#!/bin/sh
#
# Runs spell-check over a single specified file.
#
# Usage: bin/spell-check.sh my-file-to-check
#
# If you don't have aspell installed then run:
#  sudo apt-get install aspell aspell-en
# aspell --html-skip does not allow to skip unique tages i.e classes.
# Here we use sed to remove the following HTML tags in the built docs:
# - <div class="mermaid"> which contains UML diagrams etc.
# - <div class="math notranslate nohighlight"> exculdes LaTeX
# - exclude any email addresses from 'mailto' links
# We use UK (GB) English, which is closest to NZ, but choose '-ize' over
# '-ise' (Oxford spelling), which is closer to scientific language

# allows the caller to alter the dictionary related args
# by default this sets the personal word list to riskscape.txt
# other projects may have to change this to use riskscape.txt and some
# other wordlist as well. this can be done with something like:
# ASPELL_DICT_ARGS="--personal file1.txt --extra-dicts=file2.txt"
ASPELL_DICT_ARGS=${ASPELL_DICT_ARGS:=--personal riskscape.txt}

cat $1 | \
sed '/<div class="mermaid">/,/<\/div>/d' | \
sed '/<div class="math notranslate nohighlight">/,/<\/div>/d' | \
sed 's|<a class="reference external" href="mailto.*</a>||' | \
aspell -d en_GB-ize --home-dir=$(pwd)/docs/aspell ${ASPELL_DICT_ARGS} --mode html --add-html-skip=cite --add-html-skip=code --add-html-skip=pre list
