Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions common/basic_list.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ namespace acommon {
data_.splice(data_.begin(),other.data_,cur);
//data_.splice_after(data_.begin(), prev);
}
template<class Pred>
void merge(BasicList & other, Pred pr) {data_.merge(other.data_, pr);}
};

}
Expand Down
2 changes: 2 additions & 0 deletions common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1446,6 +1446,8 @@ namespace acommon {
N_("maximum number that can be strung together"), KEYINFO_MAY_CHANGE}
, {"run-together-min", KeyInfoInt, "3",
N_("minimal length of interior words"), KEYINFO_MAY_CHANGE}
, {"camel-case", KeyInfoBool, "false",
N_("consider camel case words legal"), KEYINFO_MAY_CHANGE}
, {"save-repl", KeyInfoBool , "true",
N_("save replacement pairs on save all")}
, {"set-prefix", KeyInfoBool, "true",
Expand Down
22 changes: 19 additions & 3 deletions common/speller.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,22 @@ namespace acommon {

struct CheckInfo {
const CheckInfo * next;
ParmString word; // generally the root
// note: if 'incorrect' then word is a pointer into the string
// passed into check and is not null terminated;
// otherwise word is guaranteed to be null termanted
struct Word {
const char * str;
unsigned len;
ParmString pstr() const {return ParmString(str, len);}
Word() {}
Word(const char * str)
: str(str), len(strlen(str)) {}
Word(const char * str, unsigned len)
: str(str), len(len) {}
Word(ParmStr str)
: str(str.str()), len(str.size()) {}
};
Word word; // generally the root
short pre_strip_len;
short pre_add_len;
const char * pre_add;
Expand All @@ -45,8 +60,9 @@ namespace acommon {
const char * suf_add;
short pre_flag;
short suf_flag;
short guess;
short compound;
bool guess;
bool compound;
bool incorrect;
};

class Speller : public CanHaveError
Expand Down
66 changes: 66 additions & 0 deletions common/suggestions.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/* This file is part of The New Aspell
* Copyright (C) 2001-2002 by Kevin Atkinson under the GNU LGPL
* license version 2.0 or 2.1. You should have received a copy of the
* LGPL license along with this library if you did not you can find it
* at http://www.gnu.org/. */

#ifndef ASPELL_SUGGESTIONS__HPP
#define ASPELL_SUGGESTIONS__HPP

#include "vector.hpp"
#include "char_vector.hpp"
#include "convert.hpp"

namespace acommon {

class SuggestionsData {
public:
virtual void get_words(Convert *, Vector<CharVector> &) = 0;
virtual void get_normalized_scores(Vector<double> &) = 0;
virtual void get_distances(Vector<double> &) = 0;
virtual ~SuggestionsData() {}
};

class Suggestions {
public:
SuggestionsData * sugs_;
Convert * from_internal_;
Vector<CharVector> words_buffer_;
Vector<const char *> words_;
Vector<double> normalized_scores_;
Vector<double> distances_;
void reset() {
words_buffer_.clear();
words_.clear();
normalized_scores_.clear();
distances_.clear();
}
const char * * words(unsigned * len) {
if (words_.empty()) {
sugs_->get_words(from_internal_, words_buffer_);
words_.reserve(words_buffer_.size());
for (Vector<CharVector>::iterator i = words_buffer_.begin(), e = words_buffer_.end(); i != e; ++i)
words_.push_back(i->data());
}
if (len) *len = words_.size();
return words_.data();
}
double * normalized_scores(unsigned * len) {
if (normalized_scores_.empty()) {
sugs_->get_normalized_scores(normalized_scores_);
}
if (len) *len = normalized_scores_.size();
return normalized_scores_.data();
}
double * distances(unsigned * len) {
if (distances_.empty()) {
sugs_->get_distances(distances_);
}
if (len) *len = distances_.size();
return distances_.data();
}
};

}

#endif /* ASPELL_SUGGESTIONS__HPP */
50 changes: 37 additions & 13 deletions manual/aspell.texi
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ Adding Support For Other Languages
* The Simple Soundslike::
* Replacement Tables::
* Affix Compression::
* Controlling the Behavior of Run-together Words::
* Controlling the Behavior of Compound Words::
* Creating A New Character Set::
* Creating An Official Dictionary Package::

Expand Down Expand Up @@ -1039,6 +1039,11 @@ Typo-Analysis})
Suggestion mode = @samp{ultra} | @samp{fast} | @samp{normal} | @samp{slow} |
@samp{bad-spellers} (@pxref{Notes on the Different Suggestion Modes})

@item sug-split-char
@i{(list)}
Characters to use when a word into two in the suggestion list.
Setting this option to the empty list disables word splitting.

@item ignore-case
@i{(boolean)}
Ignore case when checking words.
Expand Down Expand Up @@ -1459,23 +1464,29 @@ The @option{ccpp} filter mode is a context filter that will limit
spell checking to C/C++ comments and string literals. Any code in
between will be left alone.

@subsection Run-together Word Options
@subsection Compound Word Options

These may be used to control the behavior of run-together words (for
more information @pxref{Controlling the Behavior of Run-together
These may be used to control the behavior of compound words (for
more information @pxref{Controlling the Behavior of Compound
Words}):

@table @b

@item run-together,-C|-B
@i{(boolean)}
consider run-together words valid

@item run-together-limit
@i{(integer)}
maximum number of words that can be strung together

@item run-together-min
@i{(integer)}
minimal length of interior words

@item camel-case
@i{(boolean)}
consider camelCase words valid
@end table

@subsection Miscellaneous Options
Expand Down Expand Up @@ -2441,9 +2452,9 @@ beginning of the line and the beginning of the misspelled word, a
colon, another space, and a list of the suggestions separated by
commas and spaces.

If you set the option @command{run-together} and Aspell thinks this word
is a combination of two words in the dictionary, then it prints a single
@samp{-} in one line.
If you set the option @option{run-together} or @option{camel-case} and
Aspell thinks this word is a combination of two words in the
dictionary, then it prints a single @samp{-} in one line.

Finally, if the word does not appear in the dictionary, and there are
no suggestions, then the line contains a @samp{#}, a space, the
Expand Down Expand Up @@ -2629,7 +2640,7 @@ need to create the language data file, and compile a new word list.
* The Simple Soundslike::
* Replacement Tables::
* Affix Compression::
* Controlling the Behavior of Run-together Words::
* Controlling the Behavior of Compound Words::
* Creating A New Character Set::
* Creating An Official Dictionary Package::
@end menu
Expand Down Expand Up @@ -2794,7 +2805,7 @@ full composition.
Additional options includes options to control how run-together words
are handled the same way as they are in the normal configuration
files. for more information, please @ref{Controlling the Behavior of
Run-together Words}.
Compound Words}.

@node Compiling the Word List
@section Compiling the Word List
Expand Down Expand Up @@ -3379,11 +3390,12 @@ create/DG
@noindent
will associate the @samp{D} and @samp{G} flag with the word create.

@node Controlling the Behavior of Run-together Words
@section Controlling the Behavior of Run-together Words
@node Controlling the Behavior of Compound Words
@anchor{Controlling the Behavior of Run-together Words}
@section Controlling the Behavior of Compound Words

Aspell currently has support for unconditionally accepting run-together
words.
Aspell has support for unconditionally accepting run-together words or
accepting camelCase words.

Support for unconditionally accepting run-together words can either be
turned on in the language data file or as a normal option via the
Expand All @@ -3397,6 +3409,16 @@ be specified in both the language data file or as a normal option.

@c FIXME: Add note about compound word support when suggesting.

Support for accepting camelCase words can be enabled via the
@option{camel-case} option. When this option is enabled Aspell will accept
@samp{camelCase} but will reject @samp{camelcase}. In addition,
Aspell will attempt to provide reasonable suggestions when a camelCase
word is misspelled. It will correct typos such as @samp{camelcase} or
@samp{camelcAse} and suggest @samp{camelCase}. If a single part of a
camelCase word is misspelled it will attempt to correct that part.
When camelCase mode is enabled Aspell will not attempt to split a word
by default, to change this simply set @option{sug-split-char}.

@node Creating A New Character Set
@section Creating A New Character Set

Expand Down Expand Up @@ -4398,6 +4420,8 @@ Add Markdown filter.
Add new @option{wordlists} option, which is a list of UTF-8 files that
contain additional words to accept.
@item
Provide support for checking camelCase words.
@item
When typo analysis is used, ensure that possible typos are listed
before other suggestions. Also fix a bug so that suggestions that split
a word using a space or hyphen are not always first.
Expand Down
1 change: 1 addition & 0 deletions modules/speller/default/asuggest.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ namespace aspeller {
int limit;

String split_chars;
bool camel_case;

SuggestParms() {}

Expand Down
32 changes: 32 additions & 0 deletions modules/speller/default/language.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,38 @@ namespace aspeller {
cur->next = 0;
return cur;
}

CompoundWord Language::split_word(const char * word, unsigned len,
bool camel_case) const
{
if (!camel_case || len <= 1)
return CompoundWord(word, word + len);
// len >= 2
if (is_upper(word[0])) {
if (is_lower(word[1])) {
unsigned i = 2;
while (i < len && is_lower(word[i]))
++i;
return CompoundWord(word, word + i, word + len);
}
if (is_upper(word[1])) {
unsigned i = 2;
while (i < len && is_upper(word[i]))
++i;
if (i == len)
return CompoundWord(word, word + len);
// The first upper case letter is assumed to be part of the next word
return CompoundWord(word, word + i - 1, word + len);
}
} else if (is_lower(word[0])) {
unsigned i = 1;
while (i < len && is_lower(word[i]))
++i;
return CompoundWord(word, word + i, word + len);
}
// this should't happen but just in case...
return CompoundWord(word, word + len);
}

bool SensitiveCompare::operator() (const char * word0,
const char * inlist0) const
Expand Down
28 changes: 28 additions & 0 deletions modules/speller/default/language.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,28 @@ namespace aspeller {

//

//

struct CompoundWord {
const char * word;
const char * sep;
const char * rest;
const char * end;
bool empty() const {return word == end;}
bool single() const {return rest == end;}
unsigned word_len() const {return sep - word;}
unsigned rest_offset() const {return rest - word;}
unsigned rest_len() const {return end - rest;}
CompoundWord()
: word(), sep(), rest(), end() {}
CompoundWord(const char * a, const char * b)
: word(a), sep(b), rest(b), end(b) {}
CompoundWord(const char * a, const char * b, const char * c)
: word(a), sep(b), rest(b), end(c) {}
CompoundWord(const char * a, const char * b, const char * c, const char * d)
: word(a), sep(b), rest(c), end(d) {}
};

enum StoreAs {Stripped, Lower};

class Language : public Cacheable {
Expand Down Expand Up @@ -336,6 +358,12 @@ namespace aspeller {
const char * fix_case(CasePattern case_pattern,
const char * str, String & buf) const;

//
//
//

CompoundWord split_word(const char * str, unsigned size, bool camel_case) const;

//
// for cache
//
Expand Down
Loading