Merge branch 'master' of git-ce.rwth-aachen.de:wasels.chr/damask3

0c3f0283 · Brian Christopher Wasels · 80737c45 · 2f11c65a · 0c3f0283 · 0c3f0283
Commit 0c3f0283 authored 3 years ago by Brian Christopher Wasels
--- a/Bericht/Bilder/UNet_Architecture.png
+++ b/Bericht/Bilder/UNet_Architecture.png
--- a/Bericht/Bilder/channels.jpg
+++ b/Bericht/Bilder/channels.jpg
--- a/Notes.txt
+++ b/Notes.txt
@@ -12,6 +12,9 @@ V13: 4 layer, doppel Conv, normDataen,phase 64
 V14: 4 layer, single conv, normDataen,phase + angle 64
 V15: 3 layer, doppelte depth Conv pro layer, norm. Daten,kernel 7, phase only, dropout 0.3, 32
 V16: 3 layer, doppelte depth Conv pro layer, norm. Daten,kernel 7, angelsonly, dropout 0.5, 32
+V17: 3 layer, doppelte depth Conv pro layer, norm. Daten,kernel 7, angelsonly, dropout 0.5, 32, but last layer 70 32 1 1
+V18: 3 layer, doppelte depth Conv pro layer, norm. Daten,kernel 7, angelsonly, dropout 0.5, 32, like 16 but first layer 6 32 32
+
 V9 mit kernel 7 und nur den phasen:
 	mean error over whole set: 16.91116704929035
 	max error average: 292.8658473955995 and maximum 814.873957640188

--- a/UNet/.#Train_model.sh
+++ b/UNet/.#Train_model.sh
-yk138599@login18-x-1.hpc.itc.rwth-aachen.de.71560:1644816141
\ No newline at end of file
--- a/UNet/Train_model.sh
+++ b/UNet/Train_model.sh
@@ -6,7 +6,7 @@
 #SBATCH --partition=c18g

 #SBATCH -J training_model
-#SBATCH -o Sim_logs/UNet_64_V14_%J.log
+#SBATCH -o Sim_logs/UNet_V12_%J.log
 
 #SBATCH --gres=gpu:1
 #SBATCH --time=90:00:00
@@ -16,8 +16,7 @@
 module load cuda
 module load python/3.7.11
 pip3 install --user -Iv -q torch==1.10.1
-#time python3 ./UNet_V12.py
-#time python3 ./UNet_V13.py
-time python3 ./UNet_V14.py
+
+time python3 ./UNet_V12.py
 #print GPU Information
 #$CUDA_ROOT/extras/demo_suite/deviceQuery -noprompt
--- a/UNet/2_Train_model.sh
+++ b/UNet/2_Train_model.sh
@@ -6,17 +6,18 @@
 #SBATCH --partition=c18g

 #SBATCH -J training_model
-#SBATCH -o Sim_logs/UNet_V10_%J.log
+#SBATCH -o Sim_logs/UNet_V17_%J.log
 
 #SBATCH --gres=gpu:1
-#SBATCH --time=50:00:00
+#SBATCH --time=90:00:00
 ### Request memory you need for your job in MB
-#SBATCH --mem-per-cpu=10000
+#SBATCH --mem-per-cpu=20000
 #SBATCH --mem-per-gpu=16000
 module load cuda
 module load python/3.7.11
-echo "9.1 k=7 lr=1e-06"
 pip3 install --user -Iv -q torch==1.10.1
-time python3 ./UNet_V10.py
+#time python3 ./UNet_V12.py
+time python3 ./UNet_V17.py
+#time python3 ./UNet_V14.py
 #print GPU Information
 #$CUDA_ROOT/extras/demo_suite/deviceQuery -noprompt
--- a/UNet/3_Train_model.sh
+++ b/UNet/3_Train_model.sh
+#!/usr/local_rwth/bin/zsh
+### Project account
+#SBATCH --account=rwth0744
+
+### Cluster Partition
+#SBATCH --partition=c18g
+
+#SBATCH -J training_model
+#SBATCH -o Sim_logs/UNet_V15_%J.log
+ 
+#SBATCH --gres=gpu:1
+#SBATCH --time=90:00:00
+### Request memory you need for your job in MB
+#SBATCH --mem-per-cpu=20000
+#SBATCH --mem-per-gpu=16000
+module load cuda
+module load python/3.7.11
+pip3 install --user -Iv -q torch==1.10.1
+
+time python3 ./UNet_V15.py
+#print GPU Information
+#$CUDA_ROOT/extras/demo_suite/deviceQuery -noprompt
--- a/UNet/Train_model2.sh
+++ b/UNet/Train_model2.sh
@@ -6,7 +6,7 @@
 #SBATCH --partition=c18g

 #SBATCH -J training_model
-#SBATCH -o Sim_logs/UNet_64_V16_%J.log
+#SBATCH -o Sim_logs/UNet_V16_%J.log
 
 #SBATCH --gres=gpu:1
 #SBATCH --time=90:00:00
@@ -16,8 +16,7 @@
 module load cuda
 module load python/3.7.11
 pip3 install --user -Iv -q torch==1.10.1
+
 time python3 ./UNet_V16.py
-#time python3 ./UNet_V13.py
-#time python3 ./UNet_V14.py
 #print GPU Information
 #$CUDA_ROOT/extras/demo_suite/deviceQuery -noprompt
--- a/UNet/5_Train_model.sh
+++ b/UNet/5_Train_model.sh
+#!/usr/local_rwth/bin/zsh
+### Project account
+#SBATCH --account=rwth0744
+
+### Cluster Partition
+#SBATCH --partition=c18g
+
+#SBATCH -J training_model
+#SBATCH -o Sim_logs/UNet_V16_K3_%J.log
+ 
+#SBATCH --gres=gpu:1
+#SBATCH --time=90:00:00
+### Request memory you need for your job in MB
+#SBATCH --mem-per-cpu=20000
+#SBATCH --mem-per-gpu=16000
+module load cuda
+module load python/3.7.11
+pip3 install --user -Iv -q torch==1.10.1
+
+time python3 ./UNet_V16.py
+#print GPU Information
+#$CUDA_ROOT/extras/demo_suite/deviceQuery -noprompt
--- a/UNet/Sim_logs/UNet_64_V12_25614663.log
+++ b/UNet/Sim_logs/UNet_64_V12_25614663.log
-(OK) Loading cuda 10.2.89
-(OK) Loading python 3.7.11
-(!!) The SciPy Stack is available: http://www.scipy.org/stackspec.html
- Built with GCC compilers.
-Collecting torch==1.10.1
-  Using cached torch-1.10.1-cp37-cp37m-manylinux1_x86_64.whl (881.9 MB)
-Collecting typing-extensions
-  Using cached typing_extensions-4.1.1-py3-none-any.whl (26 kB)
-Installing collected packages: typing-extensions, torch
-  WARNING: The scripts convert-caffe2-to-onnx, convert-onnx-to-caffe2 and torchrun are installed in '/home/yk138599/.local/bin' which is not on PATH.
-  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.
-Successfully installed torch-1.10.1 typing-extensions-4.1.1
-WARNING: You are using pip version 21.2.4; however, version 22.0.3 is available.
-You should consider upgrading via the '/usr/local_rwth/sw/python/3.7.11/x86_64/bin/python3.7 -m pip install --upgrade pip' command.
-number auf epochs: 500
-batchsize: 32
-learning rate: 3e-05
-kernel size is: 9
- seed is: 2518441936
-Traceback (most recent call last):
-  File "./UNet_V12.py", line 250, in <module>
-    history = fit(num_epochs, lr, model, train_dl, valid_dl,f'{path_to_rep}/UNet/output', opt_func)
-  File "./UNet_V12.py", line 165, in fit
-    loss = model.training_step(batch)
-  File "./UNet_V12.py", line 108, in training_step
-    out = self(input)                  # Generate predictions
-  File "/home/yk138599/.local/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
-    return forward_call(*input, **kwargs)
-  File "./UNet_V12.py", line 147, in forward
-    out      = self.decoder(enc_ftrs[::-1][0], enc_ftrs[::-1][1:])
-  File "/home/yk138599/.local/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
-    return forward_call(*input, **kwargs)
-  File "./UNet_V12.py", line 93, in forward
-    x        = self.dec_blocks[i](x)
-  File "/home/yk138599/.local/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
-    return forward_call(*input, **kwargs)
-  File "./UNet_V12.py", line 29, in forward
-    x = self.batch_norm_1(self.relu(self.pointwise_1(self.depthwise_1(x))))
-  File "/home/yk138599/.local/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
-    return forward_call(*input, **kwargs)
-  File "/home/yk138599/.local/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 590, in forward
-    return self._conv_forward(input, self.weight, self.bias)
-  File "/home/yk138599/.local/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 586, in _conv_forward
-    input, weight, bias, self.stride, self.padding, self.dilation, self.groups
-RuntimeError: CUDA out of memory. Tried to allocate 512.00 MiB (GPU 0; 15.78 GiB total capacity; 14.15 GiB already allocated; 280.50 MiB free; 14.16 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
-python3 ./UNet_V12.py  4.92s user 5.82s system 16% cpu 1:06.40 total
--- a/UNet/Sim_logs/UNet_64_V13_25614318.log
+++ b/UNet/Sim_logs/UNet_64_V13_25614318.log
-(OK) Loading cuda 10.2.89
-(OK) Loading python 3.7.11
-(!!) The SciPy Stack is available: http://www.scipy.org/stackspec.html
- Built with GCC compilers.
-Collecting torch==1.10.1
-  Using cached torch-1.10.1-cp37-cp37m-manylinux1_x86_64.whl (881.9 MB)
-Collecting typing-extensions
-  Using cached typing_extensions-4.1.1-py3-none-any.whl (26 kB)
-Installing collected packages: typing-extensions, torch
-  WARNING: The scripts convert-caffe2-to-onnx, convert-onnx-to-caffe2 and torchrun are installed in '/home/yk138599/.local/bin' which is not on PATH.
-  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.
-Successfully installed torch-1.10.1 typing-extensions-4.1.1
-WARNING: You are using pip version 21.2.4; however, version 22.0.3 is available.
-You should consider upgrading via the '/usr/local_rwth/sw/python/3.7.11/x86_64/bin/python3.7 -m pip install --upgrade pip' command.
-number auf epochs: 500
-batchsize: 16
-learning rate: 3e-05
-kernel size is: 9
- seed is: 2628832979
-Epoch [0], train_loss: 0.271159, val_loss: 0.268970, val_acc: 0.014685
-Epoch [1], train_loss: 0.269396, val_loss: 0.270057, val_acc: 0.073417
-Epoch [2], train_loss: 0.268085, val_loss: 0.279366, val_acc: 0.120659
-Epoch [3], train_loss: 0.266099, val_loss: 0.254583, val_acc: 0.435022
-Epoch [4], train_loss: 0.263552, val_loss: 0.256586, val_acc: 0.376657
-Epoch [5], train_loss: 0.261619, val_loss: 0.242178, val_acc: 0.313965
-Epoch [6], train_loss: 0.260539, val_loss: 0.247519, val_acc: 0.305485
-Epoch [7], train_loss: 0.259419, val_loss: 0.248480, val_acc: 0.254837
-Epoch [8], train_loss: 0.258631, val_loss: 0.247978, val_acc: 0.210317
-Epoch [9], train_loss: 0.257922, val_loss: 0.255808, val_acc: 0.172337
-Epoch [10], train_loss: 0.257285, val_loss: 0.252549, val_acc: 0.182081
-Epoch [11], train_loss: 0.256655, val_loss: 0.258195, val_acc: 0.166881
-Epoch [12], train_loss: 0.256037, val_loss: 0.265417, val_acc: 0.211055
-Epoch [13], train_loss: 0.255511, val_loss: 0.254048, val_acc: 0.176106
-Epoch [14], train_loss: 0.254910, val_loss: 0.249992, val_acc: 0.237055
-Epoch [15], train_loss: 0.254372, val_loss: 0.251587, val_acc: 0.127559
-Epoch [16], train_loss: 0.253764, val_loss: 0.260919, val_acc: 0.167581
-Epoch [17], train_loss: 0.253268, val_loss: 0.259768, val_acc: 0.206201
-python3 ./UNet_V13.py  1570.35s user 1560.30s system 96% cpu 53:54.66 total
--- a/UNet/Sim_logs/UNet_64_V14_25617675.log
+++ b/UNet/Sim_logs/UNet_64_V14_25617675.log
-(OK) Loading cuda 10.2.89
-(OK) Loading python 3.7.11
-(!!) The SciPy Stack is available: http://www.scipy.org/stackspec.html
- Built with GCC compilers.
-Collecting torch==1.10.1
-  Using cached torch-1.10.1-cp37-cp37m-manylinux1_x86_64.whl (881.9 MB)
-Collecting typing-extensions
-  Using cached typing_extensions-4.1.1-py3-none-any.whl (26 kB)
-Installing collected packages: typing-extensions, torch
-  WARNING: The scripts convert-caffe2-to-onnx, convert-onnx-to-caffe2 and torchrun are installed in '/home/yk138599/.local/bin' which is not on PATH.
-  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.
-Successfully installed torch-1.10.1 typing-extensions-4.1.1
-WARNING: You are using pip version 21.2.4; however, version 22.0.3 is available.
-You should consider upgrading via the '/usr/local_rwth/sw/python/3.7.11/x86_64/bin/python3.7 -m pip install --upgrade pip' command.
-number auf epochs: 500
-batchsize: 32
-learning rate: 3e-05
-kernel size is: 9
- seed is: 1197567716
-Traceback (most recent call last):
-  File "./UNet_V14.py", line 249, in <module>
-Traceback (most recent call last):
-  File "/rwthfs/rz/SW/UTIL.common/Python/3.7.11/x86_64/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
-    send_bytes(obj)
-  File "/rwthfs/rz/SW/UTIL.common/Python/3.7.11/x86_64/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
-    self._send_bytes(m[offset:offset + size])
-  File "/rwthfs/rz/SW/UTIL.common/Python/3.7.11/x86_64/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
-    self._send(header + buf)
-  File "/rwthfs/rz/SW/UTIL.common/Python/3.7.11/x86_64/lib/python3.7/multiprocessing/connection.py", line 368, in _send
-    n = write(self._handle, buf)
-BrokenPipeError: [Errno 32] Broken pipe
-    history = fit(num_epochs, lr, model, train_dl, valid_dl,f'{path_to_rep}/UNet/output', opt_func)
-  File "./UNet_V14.py", line 163, in fit
-    for batch in train_loader:
-  File "./UNet_V14.py", line 201, in __iter__
-    yield to_device(b, self.device)
-  File "./UNet_V14.py", line 189, in to_device
-    return [to_device(x, device) for x in data]
-  File "./UNet_V14.py", line 189, in <listcomp>
-    return [to_device(x, device) for x in data]
-  File "./UNet_V14.py", line 190, in to_device
-    return data.to(device, non_blocking=True)
-  File "/home/yk138599/.local/lib/python3.7/site-packages/torch/utils/data/_utils/signal_handling.py", line 66, in handler
-    _error_if_any_worker_fails()
-RuntimeError: DataLoader worker (pid 53817) is killed by signal: Killed. 
-python3 ./UNet_V14.py  6.29s user 14.51s system 17% cpu 2:00.50 total
-slurmstepd: error: Detected 1 oom-kill event(s) in step 25617675.batch cgroup. Some of your processes may have been killed by the cgroup out-of-memory handler.
--- a/UNet/Sim_logs/UNet_64_V16_25621936.log
+++ b/UNet/Sim_logs/UNet_64_V16_25621936.log
--- a/UNet/Sim_logs/UNet_64_V14_25621929.log
+++ b/UNet/Sim_logs/UNet_64_V14_25621929.log
@@ -7,29 +7,8 @@ Collecting torch==1.10.1
 Collecting typing-extensions
  Using cached typing_extensions-4.1.1-py3-none-any.whl (26 kB)
 Installing collected packages: typing-extensions, torch
-  WARNING: The scripts convert-caffe2-to-onnx, convert-onnx-to-caffe2 and torchrun are installed in '/home/yk138599/.local/bin' which is not on PATH.
-  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.
-Successfully installed torch-1.10.1 typing-extensions-4.1.1
+ERROR: Could not install packages due to an OSError: [Errno 116] Stale file handle
+
 WARNING: You are using pip version 21.2.4; however, version 22.0.3 is available.
 You should consider upgrading via the '/usr/local_rwth/sw/python/3.7.11/x86_64/bin/python3.7 -m pip install --upgrade pip' command.
-number auf epochs: 500
-batchsize: 32
-learning rate: 3e-05
-kernel size is: 9
- seed is: 1383180841
-Traceback (most recent call last):
-  File "./UNet_V14.py", line 249, in <module>
-    history = fit(num_epochs, lr, model, train_dl, valid_dl,f'{path_to_rep}/UNet/output', opt_func)
-  File "./UNet_V14.py", line 170, in fit
-    result = evaluate(model, val_loader)
-  File "/home/yk138599/.local/lib/python3.7/site-packages/torch/autograd/grad_mode.py", line 28, in decorate_context
-    return func(*args, **kwargs)
-  File "./UNet_V14.py", line 153, in evaluate
-    outputs = [model.validation_step(batch) for batch in val_loader]
-  File "./UNet_V14.py", line 153, in <listcomp>
-    outputs = [model.validation_step(batch) for batch in val_loader]
-  File "./UNet_V14.py", line 115, in validation_step
-    acc = accuracy(out.detach(), labels.detach())         # Calculate accuracy
-TypeError: accuracy() missing 1 required positional argument: 'normalization'
-terminate called without an active exception
-python3 ./UNet_V14.py  42.18s user 50.52s system 45% cpu 3:24.39 total
+python3 ./UNet_V12.py  0.44s user 0.19s system 15% cpu 4.079 total
--- a/UNet/Sim_logs/UNet_V12_25657700.log
+++ b/UNet/Sim_logs/UNet_V12_25657700.log
--- a/UNet/Sim_logs/UNet_V13_25657882.log
+++ b/UNet/Sim_logs/UNet_V13_25657882.log
--- a/UNet/Sim_logs/UNet_V15_25648398.log
+++ b/UNet/Sim_logs/UNet_V15_25648398.log
--- a/UNet/Sim_logs/UNet_V15_25678820.log
+++ b/UNet/Sim_logs/UNet_V15_25678820.log
--- a/UNet/Sim_logs/UNet_V16_25648002.log
+++ b/UNet/Sim_logs/UNet_V16_25648002.log
--- a/UNet/Sim_logs/UNet_64_V13_25614634.log
+++ b/UNet/Sim_logs/UNet_64_V13_25614634.log
@@ -7,15 +7,14 @@ Collecting torch==1.10.1
 Collecting typing-extensions
  Using cached typing_extensions-4.1.1-py3-none-any.whl (26 kB)
 Installing collected packages: typing-extensions, torch
-  WARNING: The scripts convert-caffe2-to-onnx, convert-onnx-to-caffe2 and torchrun are installed in '/home/yk138599/.local/bin' which is not on PATH.
-  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.
-Successfully installed torch-1.10.1 typing-extensions-4.1.1
+ERROR: Could not install packages due to an OSError: [Errno 116] Stale file handle
+
 WARNING: You are using pip version 21.2.4; however, version 22.0.3 is available.
 You should consider upgrading via the '/usr/local_rwth/sw/python/3.7.11/x86_64/bin/python3.7 -m pip install --upgrade pip' command.
 Traceback (most recent call last):
-  File "./UNet_V14.py", line 10, in <module>
+  File "./UNet_V16.py", line 10, in <module>
    import torch
  File "/home/yk138599/.local/lib/python3.7/site-packages/torch/__init__.py", line 197, in <module>
    from torch._C import *  # noqa: F403
 ImportError: /home/yk138599/.local/lib/python3.7/site-packages/torch/lib/libtorch_cuda.so: cannot read file data
-python3 ./UNet_V14.py  0.14s user 0.06s system 47% cpu 0.420 total
+python3 ./UNet_V16.py  0.13s user 0.04s system 31% cpu 0.537 total