Roboy
diff --git a/‎config/infer.yaml‎ b/‎config/infer.yaml‎
diff --git a/‎config/test.yaml‎ b/‎config/test.yaml‎
diff --git a/‎config/train.yaml‎
Lines changed: 48 additions & 0 deletions b/‎config/train.yaml‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎infer.py‎
Lines changed: 24 additions & 0 deletions b/‎infer.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎loader.py‎
Lines changed: 5 additions & 0 deletions b/‎loader.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎models/deepspeech2.py‎
Lines changed: 5 additions & 0 deletions b/‎models/deepspeech2.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎test.py‎
Lines changed: 18 additions & 0 deletions b/‎test.py‎
Lines changed: 18 additions & 0 deletions
@@ -0,0 +1,48 @@
+train:
+  train-manifest: 'examples/manifests/train_manifest.csv'
+  val-manifest: 'examples/manifests/val_manifest.csv'
+  labels-path: 'examples/labels.json' # Contains all characters for transcription
+  log-dir: 'logs' # Location for log files
+  def-dir: 'examples/checkpoints/', # Default location to save/load models
+  model-name: 'deepspeech_final.pth' # File name to save the best model
+  load-from: 'deepspeech_final.pth' # File name containing a checkpoint to continue/finetune
+
+  sample-rate: 16000 # Sample rate
+  window-size: 0.02 # Window size for spectrogram in seconds
+  window-stride: 0.01 # Window stride for spectrogram in seconds
+  window: 'hamming' # Window type for spectrogram generation
+
+  batch-size: 32 # Batch size for training
+  hidden-size: 800 # Hidden size of RNNs
+  hidden-layers: 5 # Number of RNN layers
+  rnn-type: 'gru' # Type of the RNN unit: gru|lstm are supported
+
+  max-epochs: 70 # Number of training epochs
+  learning-rate: 3e-4 # Initial learning rate
+  momentum: 0.9 # Momentum
+  max-norm: 800 # Norm cutoff to prevent explosion of gradients
+  learning-anneal: 1.1n # Annealing applied to learning rate every epoch
+  sortaGrad: True # Turn on ordering of dataset on sequence length for the first epoch
+
+  checkpoint: True # Enables checkpoint saving of model
+  checkpoint-per-epoch: 1 # Save checkpoint per x epochs
+  silent: False # Turn off progress tracking per iteration
+  continue: False # Continue training with a pre-trained model
+  finetune: False # Finetune a pre-trained model
+
+  num-data-workers: 8 # Number of workers used in data-loading
+  augment: False # Use random tempo and gain perturbations
+  shuffle: True # Turn on shuffling and sample from dataset based on sequence length (smallest to largest)
+
+  seed: 123456 # Seed to generators
+  cuda: True # Use cuda to train model
+  half-precision: Trues # Uses half precision to train a model
+  apex: True # Uses mixed precision to train a model
+  static-loss-scaling: False # Static loss scale for mixed precision
+  dynamic-loss-scaling: True # Use dynamic loss scaling for mixed precision
+
+  dist-url: 'tcp://127.0.0.1:1550' # URL used to set up distributed training
+  dist-backend: 'nccl' # Distributed backend
+  world-size: 1 # Number of distributed processes
+  rank: 0 # The rank of the current process
+  gpu-rank: 0 # If using distributed parallel for multi-gpu, sets the GPU for the process
@@ -0,0 +1,24 @@
+import argparse
+import os
+import wave
+from typing import Dict
+
+import yaml
+
+from modelwrapper import ModelWrapper
+
+parser = argparse.ArgumentParser(description='ASR inference')
+parser.add_argument('--config', metavar='DIR',
+                    help='Path to inference config file', default='config/infer.yaml')
+
+if __name__ == '__main__':
+    args = parser.parse_args()
+    with open(args.config, 'r') as file:
+        config = yaml.load(file)
+    config_dict: Dict = config["infer"]
+    model = ModelWrapper(**config_dict)
+    if "wave_path" in config_dict.keys() and os.path.isfile(config_dict.get("wave_path")):
+        sound = wave.open(config_dict.get("wave_path"))
+        print(model.infer(sound))
+    else:
+        print("Wave file not found!")
@@ -1,3 +1,8 @@
+# ----------------------------------------------------------------------------
+# Based on SeanNaren's deepspeech.pytorch:
+# https://github.com/SeanNaren/deepspeech.pytorch
+# ----------------------------------------------------------------------------
+
 import math
 import warnings
 from typing import Tuple
 
@@ -1,3 +1,8 @@
+# ----------------------------------------------------------------------------
+# Based on SeanNaren's deepspeech.pytorch:
+# https://github.com/SeanNaren/deepspeech.pytorch
+# ----------------------------------------------------------------------------
+
 import math
 from collections import OrderedDict
 
 
@@ -0,0 +1,18 @@
+import argparse
+from typing import Dict
+
+import yaml
+
+from modelwrapper import ModelWrapper
+
+parser = argparse.ArgumentParser(description='ASR testing')
+parser.add_argument('--config', metavar='DIR',
+                    help='Path to test config file', default='config/test.yaml')
+
+if __name__ == '__main__':
+    args = parser.parse_args()
+    with open(args.config, 'r') as file:
+        config = yaml.load(file)
+    config_dict: Dict = config["test"]
+    model = ModelWrapper(**config_dict)
+    model.test()