GNUAspell · kevina · Sep 20, 2019 · Aug 8, 2019 · Aug 8, 2019 · Aug 30, 2019
diff --git a/common/basic_list.hpp b/common/basic_list.hpp
@@ -61,6 +61,8 @@ namespace acommon {
       data_.splice(data_.begin(),other.data_,cur);
       //data_.splice_after(data_.begin(), prev);
     }
+    template<class Pred>
+    void merge(BasicList & other, Pred pr) {data_.merge(other.data_, pr);}
   };
 
 }

diff --git a/common/config.cpp b/common/config.cpp
@@ -1446,6 +1446,8 @@ namespace acommon {
        N_("maximum number that can be strung together"), KEYINFO_MAY_CHANGE}
     , {"run-together-min",    KeyInfoInt,   "3",
        N_("minimal length of interior words"), KEYINFO_MAY_CHANGE}
+    , {"camel-case", KeyInfoBool,  "false",
+       N_("consider camel case words legal"), KEYINFO_MAY_CHANGE}
     , {"save-repl", KeyInfoBool  , "true",
        N_("save replacement pairs on save all")}
     , {"set-prefix", KeyInfoBool, "true",

diff --git a/common/speller.hpp b/common/speller.hpp
@@ -36,7 +36,22 @@ namespace acommon {
 
   struct CheckInfo {
     const CheckInfo * next;
-    ParmString word; // generally the root
+    // note: if 'incorrect' then word is a pointer into the string
+    // passed into check and is not null terminated;
+    // otherwise word is guaranteed to be null termanted
+    struct Word {
+      const char * str;
+      unsigned len;
+      ParmString pstr() const {return ParmString(str, len);}
+      Word() {}
+      Word(const char * str)
+        : str(str), len(strlen(str)) {}
+      Word(const char * str, unsigned len)
+        : str(str), len(len) {}
+      Word(ParmStr str)
+        : str(str.str()), len(str.size()) {}
+    };
+    Word word; // generally the root
     short pre_strip_len;
     short pre_add_len;
     const char * pre_add;
@@ -45,8 +60,9 @@ namespace acommon {
     const char * suf_add;
     short pre_flag;
     short suf_flag;
-    short guess;
-    short compound;
+    bool guess;
+    bool compound;
+    bool incorrect;
   };
 
   class Speller : public CanHaveError

diff --git a/common/suggestions.hpp b/common/suggestions.hpp
@@ -0,0 +1,66 @@
+/* This file is part of The New Aspell
+ * Copyright (C) 2001-2002 by Kevin Atkinson under the GNU LGPL
+ * license version 2.0 or 2.1.  You should have received a copy of the
+ * LGPL license along with this library if you did not you can find it
+ * at http://www.gnu.org/.                                              */
+
+#ifndef ASPELL_SUGGESTIONS__HPP
+#define ASPELL_SUGGESTIONS__HPP
+
+#include "vector.hpp"
+#include "char_vector.hpp"
+#include "convert.hpp"
+
+namespace acommon {
+
+  class SuggestionsData {
+  public:
+    virtual void get_words(Convert *, Vector<CharVector> &) = 0;
+    virtual void get_normalized_scores(Vector<double> &) = 0;
+    virtual void get_distances(Vector<double> &) = 0;
+    virtual ~SuggestionsData() {}
+  };
+
+  class Suggestions {
+  public:
+    SuggestionsData * sugs_;
+    Convert * from_internal_;
+    Vector<CharVector> words_buffer_;
+    Vector<const char *> words_;
+    Vector<double> normalized_scores_;
+    Vector<double> distances_;
+    void reset() {
+      words_buffer_.clear();
+      words_.clear();
+      normalized_scores_.clear();
+      distances_.clear();
+    }
+    const char * * words(unsigned * len) {
+      if (words_.empty()) {
+        sugs_->get_words(from_internal_, words_buffer_);
+        words_.reserve(words_buffer_.size());
+        for (Vector<CharVector>::iterator i = words_buffer_.begin(), e = words_buffer_.end(); i != e; ++i)
+          words_.push_back(i->data());
+      }
+      if (len) *len = words_.size();
+      return words_.data();
+    }
+    double * normalized_scores(unsigned * len) {
+      if (normalized_scores_.empty()) {
+        sugs_->get_normalized_scores(normalized_scores_);
+      }
+      if (len) *len = normalized_scores_.size();
+      return normalized_scores_.data();
+    }
+    double * distances(unsigned * len) {
+      if (distances_.empty()) {
+        sugs_->get_distances(distances_);
+      }
+      if (len) *len = distances_.size();
+      return distances_.data();
+    }
+  };
+
+}
+
+#endif /* ASPELL_SUGGESTIONS__HPP */
diff --git a/manual/aspell.texi b/manual/aspell.texi
@@ -118,7 +118,7 @@ Adding Support For Other Languages
 * The Simple Soundslike::       
 * Replacement Tables::          
 * Affix Compression::           
-* Controlling the Behavior of Run-together Words::  
+* Controlling the Behavior of Compound Words::  
 * Creating A New Character Set::  
 * Creating An Official Dictionary Package::  
 
@@ -1039,6 +1039,11 @@ Typo-Analysis})
 Suggestion mode = @samp{ultra} | @samp{fast} | @samp{normal} | @samp{slow} |
 @samp{bad-spellers} (@pxref{Notes on the Different Suggestion Modes})
 
+@item sug-split-char
+@i{(list)}
+Characters to use when a word into two in the suggestion list.
+Setting this option to the empty list disables word splitting.
+
 @item ignore-case
 @i{(boolean)}
 Ignore case when checking words.
@@ -1459,23 +1464,29 @@ The @option{ccpp} filter mode is a context filter that will limit
 spell checking to C/C++ comments and string literals. Any code in
 between will be left alone.
 
-@subsection Run-together Word Options
+@subsection Compound Word Options
 
-These may be used to control the behavior of run-together words (for
-more information @pxref{Controlling the Behavior of Run-together
+These may be used to control the behavior of compound  words (for
+more information @pxref{Controlling the Behavior of Compound
 Words}):
+
 @table @b
 
 @item run-together,-C|-B
 @i{(boolean)}
 consider run-together words valid
+
 @item run-together-limit
 @i{(integer)}
 maximum number of words that can be strung together
 
 @item run-together-min
 @i{(integer)}
 minimal length of interior words
+
+@item camel-case
+@i{(boolean)}
+consider camelCase words valid
 @end table
 
 @subsection Miscellaneous Options
@@ -2441,9 +2452,9 @@ beginning of the line and the beginning of the misspelled word, a
 colon, another space, and a list of the suggestions separated by
 commas and spaces.
 
-If you set the option @command{run-together} and Aspell thinks this word
-is a combination of two words in the dictionary, then it prints a single
-@samp{-} in one line.
+If you set the option @option{run-together} or @option{camel-case} and
+Aspell thinks this word is a combination of two words in the
+dictionary, then it prints a single @samp{-} in one line.
 
 Finally, if the word does not appear in the dictionary, and there are
 no suggestions, then the line contains a @samp{#}, a space, the
@@ -2629,7 +2640,7 @@ need to create the language data file, and compile a new word list.
 * The Simple Soundslike::       
 * Replacement Tables::          
 * Affix Compression::           
-* Controlling the Behavior of Run-together Words::  
+* Controlling the Behavior of Compound Words::  
 * Creating A New Character Set::  
 * Creating An Official Dictionary Package::  
 @end menu
@@ -2794,7 +2805,7 @@ full composition.
 Additional options includes options to control how run-together words
 are handled the same way as they are in the normal configuration
 files.  for more information, please @ref{Controlling the Behavior of
-Run-together Words}.
+Compound Words}.
 
 @node Compiling the Word List
 @section Compiling the Word List
@@ -3379,11 +3390,12 @@ create/DG
 @noindent
 will associate the @samp{D} and @samp{G} flag with the word create.
 
-@node Controlling the Behavior of Run-together Words
-@section Controlling the Behavior of Run-together Words
+@node Controlling the Behavior of Compound Words
+@anchor{Controlling the Behavior of Run-together Words}
+@section Controlling the Behavior of Compound Words
 
-Aspell currently has support for unconditionally accepting run-together
-words.
+Aspell has support for unconditionally accepting run-together words or
+accepting camelCase words.
 
 Support for unconditionally accepting run-together words can either be
 turned on in the language data file or as a normal option via the
@@ -3397,6 +3409,16 @@ be specified in both the language data file or as a normal option.
 
 @c FIXME: Add note about compound word support when suggesting.
 
+Support for accepting camelCase words can be enabled via the
+@option{camel-case} option.  When this option is enabled Aspell will accept
+@samp{camelCase} but will reject @samp{camelcase}.  In addition,
+Aspell will attempt to provide reasonable suggestions when a camelCase
+word is misspelled.  It will correct typos such as @samp{camelcase} or
+@samp{camelcAse} and suggest @samp{camelCase}.  If a single part of a
+camelCase word is misspelled it will attempt to correct that part.
+When camelCase mode is enabled Aspell will not attempt to split a word
+by default, to change this simply set @option{sug-split-char}.
+
 @node Creating A New Character Set
 @section Creating A New Character Set
 
@@ -4398,6 +4420,8 @@ Add Markdown filter.
 Add new @option{wordlists} option, which is a list of UTF-8 files that
 contain additional words to accept.
 @item
+Provide support for checking camelCase words.
+@item
 When typo analysis is used, ensure that possible typos are listed
 before other suggestions.  Also fix a bug so that suggestions that split
 a word using a space or hyphen are not always first.

diff --git a/modules/speller/default/asuggest.hpp b/modules/speller/default/asuggest.hpp
@@ -36,6 +36,7 @@ namespace aspeller {
     int limit;
 
     String split_chars;
+    bool camel_case;
 
     SuggestParms() {}
 

diff --git a/modules/speller/default/language.cpp b/modules/speller/default/language.cpp
@@ -425,6 +425,38 @@ namespace aspeller {
     cur->next = 0;
     return cur;
   }
+
+  CompoundWord Language::split_word(const char * word, unsigned len,
+                                    bool camel_case) const
+  {
+    if (!camel_case || len <= 1)
+      return CompoundWord(word, word + len);
+    // len >= 2
+    if (is_upper(word[0])) {
+      if (is_lower(word[1])) {
+        unsigned i = 2;
+        while (i < len && is_lower(word[i]))
+          ++i;
+        return CompoundWord(word, word + i, word + len);
+      }
+      if (is_upper(word[1])) {
+        unsigned i = 2;
+        while (i < len && is_upper(word[i]))
+          ++i;
+        if (i == len)
+          return CompoundWord(word, word + len);
+        // The first upper case letter is assumed to be part of the next word
+        return CompoundWord(word, word + i - 1, word + len);
+      }
+    } else if (is_lower(word[0])) {
+      unsigned i = 1;
+      while (i < len && is_lower(word[i]))
+        ++i;
+      return CompoundWord(word, word + i, word + len);
+    }
+    // this should't happen but just in case...
+    return CompoundWord(word, word + len);
+  }
 
   bool SensitiveCompare::operator() (const char * word0, 
 				     const char * inlist0) const

diff --git a/modules/speller/default/language.hpp b/modules/speller/default/language.hpp
@@ -58,6 +58,28 @@ namespace aspeller {
 
   //
 
+  //
+
+  struct CompoundWord {
+    const char * word;
+    const char * sep;
+    const char * rest;
+    const char * end;
+    bool empty() const {return word == end;}
+    bool single() const {return rest == end;}
+    unsigned word_len() const {return sep - word;}
+    unsigned rest_offset() const {return rest - word;}
+    unsigned rest_len() const {return end - rest;}
+    CompoundWord()
+      : word(), sep(), rest(), end() {}
+    CompoundWord(const char * a, const char * b)
+      : word(a), sep(b), rest(b), end(b) {}
+    CompoundWord(const char * a, const char * b, const char * c)
+      : word(a), sep(b), rest(b), end(c) {}
+    CompoundWord(const char * a, const char * b, const char * c, const char * d)
+      : word(a), sep(b), rest(c), end(d) {}
+  };
+
   enum StoreAs {Stripped, Lower};
 
   class Language : public Cacheable {
@@ -336,6 +358,12 @@ namespace aspeller {
     const char * fix_case(CasePattern case_pattern, 
                           const char * str, String & buf) const;
 
+    //
+    //
+    //
+
+    CompoundWord split_word(const char * str, unsigned size, bool camel_case) const;
+
     //
     // for cache
     //