diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..c74ad51
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,29 @@
+BSD 3-Clause License
+
+Copyright (c) 2017, Josef Novak
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/src/bin/phonetisaurus-align.cc b/src/bin/phonetisaurus-align.cc
index 7c44d0f..e26fda2 100644
--- a/src/bin/phonetisaurus-align.cc
+++ b/src/bin/phonetisaurus-align.cc
@@ -63,8 +63,7 @@ int load_input_file (M2MFstAligner* aligner, string input_file,
       lines++;
     }
     infile.close ();
-  }
-  else {
+  } else {
     cerr << "Failed to open input file: " << input_file << endl;
     return -1;
   }
@@ -213,7 +212,7 @@ void compileNBestFarArchive (M2MFstAligner* aligner,
       set_syms = true;
     }
 
-    sprintf (keybuf, "%0*d", generate_keys, i+1);
+    snsprintf (keybuf, "%0*d", generate_keys, i+1);
     key = keybuf;
 
     //Write the final result to the FARchive
diff --git a/src/bin/phonetisaurus_apply b/src/bin/phonetisaurus_apply
new file mode 100755
index 0000000..66c22f0
--- /dev/null
+++ b/src/bin/phonetisaurus_apply
@@ -0,0 +1,308 @@
+#!/usr/bin/env python
+# -*- mode: python; coding: utf-8 -*-
+from __future__ import print_function
+import os, logging, subprocess, time, re
+from datetime import datetime
+from collections import defaultdict
+import tempfile
+
+class G2PModelTester () :
+    """G2P Model training wrapper class.
+
+    Phonetisaurus G2P modeling training wrapper class.
+    This wraps the alignment, joint n-gram training, and ARPA to
+    WFST conversion steps into one command.
+    """
+    
+    def __init__ (self, model, **kwargs) :
+        self.model = model
+        self.lexicon_file = kwargs.get ("lexicon", None)
+        self.nbest = kwargs.get ("nbest", 1)
+        self.thresh = kwargs.get ("thresh", 99)
+        self.beam = kwargs.get ("beam", 10000)
+        self.greedy = kwargs.get ("greedy", False)
+        self.verbose = kwargs.get ("verbose", False)
+        self.logger = self.setupLogger ()
+
+    def setupLogger (self) :
+        """Setup the logger and logging level.
+
+        Setup the logger and logging level.  We only support
+        verbose and non-verbose mode.
+
+        Args:
+            verbose (bool): Verbose mode, or not.
+
+        Returns:
+            Logger: A configured logger instance.
+        """
+        
+        level = logging.DEBUG if self.verbose else logging.INFO
+        logging.basicConfig (
+            level=level,
+            format="\033[94m%(levelname)s:%(name)s:"\
+            "%(asctime)s\033[0m:  %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S"
+        )
+
+        return logging.getLogger ("phonetisaurus-apply")
+
+    def _loadLexicon (self) :
+        """Load the lexicon from a file.
+
+        Load the reference lexicon from a file, and store it
+        in a defaultdict (list).
+        """
+        
+        _lexicon = defaultdict (list)
+        if not self.lexicon_file :
+            return _lexicon
+
+        self.logger.debug ("Loading lexicon from file...")
+        with open (self.lexicon_file, "r") as ifp :
+            for line in ifp :
+                line = line.decode ("utf8").strip ()
+                word, pron = re.split (ur"\t", line)
+                _lexicon [word].append (pron)
+
+        return _lexicon
+    
+    def checkPhonetisaurusConfig (self) :
+        """Run some basic checks before training.
+
+        Run some basic checks regarding the $PATH, environment,
+        and provided data before starting training.
+
+        Raises:
+            EnvironmentError: raised if binaries are not found.
+        """
+
+        self.logger.debug ("Checking command configuration...")
+        for program in ["phonetisaurus-g2pfst"] :
+            if not self.which (program) :
+                raise EnvironmentError, "Phonetisaurus command, '{0}', "\
+                    "not found in path.".format (program)
+
+        if self.lexicon_file and not os.path.exists (self.lexicon_file) :
+            self.logger.error ("Could not find provided lexicon file.")
+            sys.exit (1)
+            
+        for key,val in sorted (vars (self).iteritems ()) :
+            self.logger.debug (u"{0}:  {1}".format (key, val))
+            
+        self.lexicon = self._loadLexicon ()
+        
+        return
+    
+    def which (self, program) :
+        """Basic 'which' implementation for python.
+
+        Basic 'which' implementation for python from stackoverflow:
+          * https://stackoverflow.com/a/377028/6739158
+
+        Args:
+            program (str): The program name to search the $PATH for.
+
+        Returns:
+            path/None: The path to the executable, or None.
+        """
+
+        def is_exe (fpath) :
+            return os.path.isfile (fpath) and os.access (fpath, os.X_OK)
+
+        fpath, fname = os.path.split (program)
+        if fpath:
+            if is_exe (program):
+                return program
+        else:
+            for path in os.environ["PATH"].split (os.pathsep) :
+                path = path.strip ('"')
+                exe_file = os.path.join (path, program)
+                if is_exe (exe_file):
+                    return exe_file
+
+        return None
+
+    def makeG2PCommand (self, word_list) :
+        """Build the G2P command.
+
+        Build the G2P command from the provided arguments.
+
+        Returns:
+            list: The command in subprocess list format.
+        """
+
+        command = [
+            u"phonetisaurus-g2pfst",
+            u"--model={0}".format (self.model),
+            u"--nbest={0}".format (self.nbest),
+            u"--beam={0}".format (self.beam),
+            u"--thresh={0}".format (self.thresh),
+            u"--wordlist={0}".format (word_list)
+        ]
+        
+        self.logger.debug (u" ".join (command))
+
+        return command
+
+    def runG2PCommand (self, word_list_file) :
+        """Generate and run the actual G2P command.
+        
+        Generate and run the actual G2P command.  Each synthesized
+        entry will be yielded back on-the-fly via the subprocess
+        stdout readline method.
+
+        Args:
+            word_list_file (str): The input word list.
+        """
+        g2p_command = self.makeG2PCommand (word_list_file)
+        
+        self.logger.debug ("Applying G2P model...")
+
+        with open (os.devnull, "w") as devnull :
+            proc = subprocess.Popen (
+                g2p_command,
+                stdout=subprocess.PIPE,
+                stderr=devnull if not self.verbose else None
+            )
+            
+            for line in iter (proc.stdout.readline, "") :
+                parts = re.split (ur"\t", line.decode ("utf8").strip ())
+                if not len (parts) == 3 :
+                    self.logger.warning (
+                        u"No pronunciation for word: '{0}'".format (parts [0])
+                    )
+                    continue
+                
+                yield parts
+
+        return
+
+    def applyG2POnly (self, word_list_file) :
+        """Apply the G2P model to a word list.
+
+        Apply the G2P model to a word list.  No filtering or application
+        of a reference lexicon is used here.
+
+        Args:
+            word_list_file (str): The input word list.
+        """
+        for word, score, pron in self.runG2PCommand (word_list_file) :
+            line = u""
+            if self.verbose :
+                line = u"{0}\t{1:.2f}\t{2}".format (
+                    word, float (score), pron
+                )
+            else :
+                line = u"{0}\t{1}".format (word, pron)
+            print (line.encode ("utf8"))
+        
+        return
+
+    def applyG2PWithLexicon (self, word_list_file) :
+        """Apply the G2P model to a word list, combined with lexicon.
+
+        Apply the G2P model to a word list, but combine this with 
+        a reference lexicon.  Words for which a reference entry exists
+        will not be sent to the G2P, unless the additional '--greedy'
+        flag is set to True.
+
+        Args:
+            word_list_file (str): The input word list.
+        """
+        target_lexicon = defaultdict (list)
+        tmpwordlist = tempfile.NamedTemporaryFile (delete=False)
+
+        #First, find any words in the target list for which we already
+        # have a canonical pronunciation in the reference lexicon.
+        with open (word_list_file, "r") as ifp :
+            for word in ifp :
+                word = word.decode ("utf8").strip ()
+                if word in self.lexicon :
+                    target_lexicon [word] = [(0.0,pron)
+                                             for pron in self.lexicon [word]]
+                    #In greedy mode we still send words to the G2P, even
+                    # if we have canonical entries in the reference lexicon.
+                    if self.greedy :
+                        print (word.encode ("utf8"), file=tmpwordlist)
+                else :
+                    print (word.encode ("utf8"), file=tmpwordlist)
+        tmpwordlist.close ()
+
+        #Second, iterate through the G2P output, and filter against
+        # any possible duplicates previously found in the reference lexicon.
+        for word, score, pron in self.runG2PCommand (tmpwordlist.name) :
+            prons = set ([p for s,p in target_lexicon [word]])
+            if pron in prons :
+                continue
+            target_lexicon [word].append ((score, pron))
+
+        #Finally, sort everything that is left and print it.
+        for word in sorted (target_lexicon.keys ()) :
+            for score, pron in target_lexicon [word] :
+                line = u""
+                if self.verbose :
+                    line = u"{0}\t{1:.2f}\t{2}".format (
+                        word, float (score), pron
+                    )
+                else :
+                    line = u"{0}\t{1}".format (word, pron)
+                print (line.encode ("utf8"))
+        
+        os.unlink (tmpwordlist.name)
+        return
+    
+    def ApplyG2PModel (self, word_list_file) :
+        """Apply the G2P model to a word list.
+
+        Apply the G2P model to a word list.
+
+        Args:
+            word_list_file (str): The input word list.
+        """
+        self.checkPhonetisaurusConfig ()
+        
+        if not os.path.exists (word_list_file) \
+           or not os.path.isfile (word_list_file) :
+            raise IOError, "Word list file not found."
+
+        if len (self.lexicon) == 0 :
+            self.applyG2POnly (word_list_file)
+        else :
+            self.applyG2PWithLexicon (word_list_file)
+        
+        return
+    
+if __name__ == "__main__" :
+    import sys, argparse
+
+    example = "{0} --model train/model.fst --word test".format (sys.argv [0])
+    
+    parser  = argparse.ArgumentParser (description=example)
+    parser.add_argument ("--model", "-m", help="Phonetisaurus G2P fst model.",
+                         required=True)
+    parser.add_argument ("--lexicon", "-l", help="Optional reference lexicon.",
+                         required=False)
+    parser.add_argument ("--nbest", "-n", help="Nbest highest order.",
+                         default=1, type=int)
+    parser.add_argument ("--beam", "-b", help="Search 'beam'.",
+                         default=10000, type=int)
+    parser.add_argument ("--thresh", "-t", help="Pruning threshold for n-best.",
+                         default=99.0, type=float)
+    parser.add_argument ("--greedy", "-g", help="Use the G2P even if a "
+                         "reference lexicon has been provided.", default=False,
+                         action="store_true")
+    parser.add_argument ("--word_list", "-wl", help="Input word or word list to apply "
+                        "G2P model to.", type=str)
+    
+    parser.add_argument ("--verbose", "-v", help="Verbose mode.",
+                         default=False, action="store_true")
+    args = parser.parse_args ()
+
+    tester = G2PModelTester (
+        args.model,
+        **{key:val for key,val in args.__dict__.iteritems ()
+           if not key in ["model","word_list"]}
+    )
+
+    tester.ApplyG2PModel (args.word_list)
diff --git a/src/bin/phonetisaurus_train b/src/bin/phonetisaurus_train
new file mode 100755
index 0000000..fcbc408
--- /dev/null
+++ b/src/bin/phonetisaurus_train
@@ -0,0 +1,350 @@
+#!/usr/bin/env python
+# -*- mode: python; coding: utf-8 -*-
+from __future__ import print_function
+import os, logging, subprocess, time, re
+from datetime import datetime
+
+
+class G2PModelTrainer () :
+    """G2P Model training wrapper class.
+
+    Phonetisaurus G2P modeling training wrapper class.
+    This wraps the alignment, joint n-gram training, and ARPA to
+    WFST conversion steps into one command.
+    """
+    
+    def __init__ (self, lexicon_file, **kwargs) :
+        self.lexicon_file = lexicon_file
+        self.model_prefix = kwargs.get ("model_prefix", "model")
+        self.dir_prefix = kwargs.get ("dir_prefix", "train")
+        self.ngram_order = kwargs.get ("ngram_order", 8)
+        self.seq1_max = kwargs.get ("seq1_max", 2)
+        self.seq2_max = kwargs.get ("seq2_max", 2)
+        self.seq1_del = kwargs.get ("seq1_del", False)
+        self.seq2_del = kwargs.get ("seq2_del", False)
+        self.verbose = kwargs.get ("verbose", False)
+        self.logger = self.setupLogger ()
+        self.makeJointNgramCommand = self._setLMCommand (
+            kwargs.get ("lm", "mitlm")
+        )
+
+    def setupLogger (self) :
+        """Setup the logger and logging level.
+
+        Setup the logger and logging level.  We only support
+        verbose and non-verbose mode.
+
+        Args:
+            verbose (bool): Verbose mode, or not.
+
+        Returns:
+            Logger: A configured logger instance.
+        """
+        
+        level = logging.DEBUG if self.verbose else logging.INFO
+        logging.basicConfig (
+            level=level,
+            format="\033[94m%(levelname)s:%(name)s:"\
+            "%(asctime)s\033[0m:  %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S"
+        )
+
+        return logging.getLogger ("phonetisaurus-train")
+
+    def validateLexicon (self) :
+        """Validate the input training lexicon.
+
+        Validate the input training lexicon.  At present
+        this simply checks if the default reserved characters,
+        ['}', '|', '_'], are used present in the lexicon.
+        """
+        
+        validator = re.compile (ur"[\}\|_]")
+        
+        with open (self.lexicon_file, "r") as ifp :
+            for line in ifp :
+                if validator.search (line.decode ("utf8")) :
+                    error = "Bad line contains reservered character:\n\t{0}"
+                    error = error.format (line)
+                    raise ValueError, error
+        
+        return
+    
+    def checkPhonetisaurusConfig (self) :
+        """Run some basic checks before training.
+
+        Run some basic checks regarding the $PATH, environment,
+        and provided data before starting training.
+
+        Raises:
+            EnvironmentError: raised if binaries are not found.
+        """
+
+        self.logger.info ("Checking command configuration...")
+        for program in ["phonetisaurus-g2pfst",
+                        "phonetisaurus-align",
+                        "phonetisaurus-arpa2wfst"] :
+            if not self.which (program) :
+                raise EnvironmentError, "Phonetisaurus command, '{0}', "\
+                    "not found in path.".format (program)
+
+        if not os.path.isdir (self.dir_prefix) :
+            self.logger.debug ("Directory does not exist.  Trying to create.")
+            os.makedirs (self.dir_prefix)
+
+        self.logger.info (
+            "Checking lexicon for reserved characters: '}', '|', '_'..."
+        )
+        self.validateLexicon ()
+        
+        path_prefix = os.path.join (self.dir_prefix, self.model_prefix)
+        
+        self.corpus_path = u"{0}.corpus".format (path_prefix)
+        self.arpa_path = u"{0}.o{1}.arpa".format (path_prefix, self.ngram_order)
+        self.model_path = u"{0}.fst".format (path_prefix)
+
+        for key,val in sorted (vars (self).iteritems ()) :
+            self.logger.debug (u"{0}:  {1}".format (key, val))
+        
+        return
+    
+    def which (self, program) :
+        """Basic 'which' implementation for python.
+
+        Basic 'which' implementation for python from stackoverflow:
+          * https://stackoverflow.com/a/377028/6739158
+        """
+
+        def is_exe (fpath) :
+            return os.path.isfile (fpath) and os.access (fpath, os.X_OK)
+
+        fpath, fname = os.path.split (program)
+        if fpath:
+            if is_exe (program):
+                return program
+        else:
+            for path in os.environ["PATH"].split (os.pathsep) :
+                path = path.strip ('"')
+                exe_file = os.path.join (path, program)
+                if is_exe (exe_file):
+                    return exe_file
+
+        return None
+
+    def _setLMCommand (self, lm) :
+        """Configure the LM training command.
+
+        Configure the LM training command according to the LM toolkit 
+        selected by the user.  Currently only mitlm is supported.
+
+        Args:
+            lm (str): The selected command type: 'mitlm'.
+
+        Returns:
+            function: The command building function for the selected toolkit.
+        """
+        if lm == "mitlm" :
+            if self.which ("estimate-ngram") == None :
+                raise EnvironmentError, "mitlm binary 'estimate-ngram' not "\
+                    "found in path."
+            return self._mitlm
+        else :
+            raise NotImplementedError, "Only mitlm is currently supported."
+        
+
+    def _mitlm (self) :
+        """mitlm estimate-ngram joint ngram training command.
+
+        Build the mitlm joint ngram training command using the 
+        estimate-ngram utility and provided arguments.
+
+        Returns:
+            list: The command in subprocess list format.
+        """
+        
+        command = [
+            "estimate-ngram",
+            "-o", str (self.ngram_order),
+            "-t", self.corpus_path,
+            "-wl", self.arpa_path
+        ]
+
+        self.logger.debug (u" ".join (command))
+
+        return command
+    
+    def makeAlignerCommand (self) :
+        """Build the aligner command from the provided arguments.
+
+        Build the aligner command from the provided arguments.
+
+        Returns:
+            list: The command in subprocess list format.
+        """
+
+        command = [
+            "phonetisaurus-align",
+            "--input={0}".format (self.lexicon_file),
+            "--ofile={0}".format (self.corpus_path),
+            "--seq1_del={0}".format (str (self.seq1_del).lower ()),
+            "--seq2_del={0}".format (str (self.seq2_del).lower ()),
+            "--seq1_max={0}".format (str (self.seq1_max)),
+            "--seq2_max={0}".format (str (self.seq2_max))
+        ]
+        
+        self.logger.debug (u" ".join (command))
+
+        return command
+
+    def makeARPAToWFSTCommand (self) :
+        """Build the ARPA to Fst conversion command.
+
+        Build the ARPA to Fst conversion command from the provided arguments.
+
+        Returns:
+            list: The command in subprocess list format.
+        """
+
+        command = [
+            "phonetisaurus-arpa2wfst",
+            "--lm={0}".format (self.arpa_path),
+            "--ofile={0}".format (self.model_path)
+        ]
+
+        self.logger.debug (u" ".join (command))
+
+        return command
+    
+    def AlignLexicon (self) :
+        """Align the provided input pronunciation lexicon.
+
+        Align the provided input pronunciation lexicon according to the 
+        provided parameters.
+        
+        Returns:
+            bool: True on success, False on failure.
+        """
+        
+        aligner_command = self.makeAlignerCommand ()
+
+        self.logger.info ("Aligning lexicon...")
+        try :
+            if self.verbose :
+                subprocess.check_call (aligner_command)
+            else :
+                with open (os.devnull, "w") as devnull :
+                    subprocess.check_call (
+                        aligner_command,
+                        stderr=devnull,
+                        stdout=devnull
+                    )
+        except subprocess.CalledProcessError :
+            self.logger.error ("Alignment failed.  Exiting.")
+            sys.exit (1)
+        
+        return
+
+    def TrainNGramModel (self) :
+        """Train the joint ngram model.
+
+        Train the joint ngram model using the selected toolkit.
+
+        Returns:
+            bool: True on success, False on failure.
+        """
+        joint_ngram_command = self.makeJointNgramCommand ()
+
+        self.logger.info ("Training joint ngram model...")
+        try :
+            if self.verbose :
+                subprocess.check_call (joint_ngram_command)
+            else :
+                with open (os.devnull, "w") as devnull :
+                    subprocess.check_call (
+                        joint_ngram_command,
+                        stderr=devnull,
+                        stdout=devnull
+                    )
+        except subprocess.CalledProcessError :
+            self.logger.error ("Ngram model estimation failed.  Exiting.")
+            sys.exit (1)
+
+        return
+
+    def ConvertARPAToWFST (self) :
+        """Convert the ARPA format joint n-gram model to Fst format.
+
+        Convert the ARPA format joint n-gram model to an equivalent Fst
+        compatible with ```phonetisaurus-g2pfst```.
+
+        Returns:
+            bool: True on success, False on failure.
+        """
+        
+        arpa_to_fst_command = self.makeARPAToWFSTCommand ()
+
+        self.logger.info ("Converting ARPA format joint n-gram "
+                          "model to WFST format...")
+        try :
+            if self.verbose :
+                subprocess.check_call (arpa_to_fst_command)
+            else :
+                with open (os.devnull, "w") as devnull :
+                    subprocess.check_call (
+                        arpa_to_fst_command,
+                        stderr=devnull,
+                        stdout=devnull
+                    )
+        except subprocess.CalledProcessError :
+            self.logger.error ("ARPA to WFST conversion failed.  Exiting.")
+            sys.exit (1)
+
+        return
+
+    def TrainG2PModel (self) :
+        self.checkPhonetisaurusConfig ()
+        
+        self.AlignLexicon ()
+        self.TrainNGramModel ()
+        self.ConvertARPAToWFST ()
+
+        self.logger.info (
+            "G2P training succeeded: \033[92m{0}\033[0m"\
+            .format (self.model_path)
+        )
+        
+        return
+    
+if __name__ == "__main__" :
+    import sys, argparse
+
+    example = "{0} --lexicon cmud.dic --seq2_del".format (sys.argv [0])
+    parser  = argparse.ArgumentParser (description=example)
+    parser.add_argument ("--lexicon", "-l", help="Training lexicon to use.",
+                         required=True)
+    parser.add_argument ("--dir_prefix", "-dp", help="Output directory prefix.",
+                         default="train")
+    parser.add_argument ("--model_prefix", "-mp", help="Output model prefix.",
+                         default="model")
+    parser.add_argument ("--ngram_order", "-o", help="Maximum ngram order "
+                         "for joint ngram model.", type=int, default=8)
+    parser.add_argument ("--seq1_del", "-s1d", help="Allow alignment deletions "
+                         "in sequence one (graphemes).",
+                         default=False, action="store_true")
+    parser.add_argument ("--seq2_del", "-s2d", help="Allow alignment deletions "
+                         "in sequence two (phonemes).",
+                         default=False, action="store_true")
+    parser.add_argument ("--seq1_max", "-s1m", help="Maximum subsequence "
+                         "length for graphemic alignment chunks.",
+                         type=int, default=2)
+    parser.add_argument ("--seq2_max", "-s2m", help="Maximum subsequence "
+                         "length for phonemic alignment chunks.",
+                         type=int, default=2)
+    parser.add_argument ("--lm", "-lm", help="LM toolkit to use.",
+                         default="mitlm")
+    parser.add_argument ("--verbose", "-v", help="Verbose mode.",
+                         default=False, action="store_true")
+    args = parser.parse_args ()
+
+    trainer = G2PModelTrainer (args.lexicon, **args.__dict__)
+    trainer.TrainG2PModel ()