diff --git a/fonts.go b/fonts.go index 0e198c9..508edd1 100644 --- a/fonts.go +++ b/fonts.go @@ -4,46 +4,77 @@ import ( "fmt" "image" "io/ioutil" + "math" "net/url" "os" "strings" ) -type fontSymbol struct { - symbol string - image *imageBinary - width int - height int +type FontSymbol struct { + symbol string + image *imageBinary + width int + height int + advance int } -func newFontSymbol(symbol string, img image.Image) *fontSymbol { - imgBin := newImageBinary(img) - fs := &fontSymbol{ - symbol: symbol, - image: imgBin, - width: imgBin.width, - height: imgBin.height, +// NewFontSymbolRune creates a new symbol for a rune. opts are optional (if set to nil). +func NewFontSymbolRune(symbol rune, img image.Image, opts *NewFontSymbolOptions) *FontSymbol { + return NewFontSymbolOpts(string([]rune{symbol}), img, opts) +} + +func NewFontSymbol(symbol string, img image.Image) *FontSymbol { + return NewFontSymbolOpts(symbol, img, nil) +} + +// NewFontSymbolOpts creates a new symbol for a rune. Use NewFontSymbol for using the default options. +func NewFontSymbolOpts(symbol string, img image.Image, opts *NewFontSymbolOptions) *FontSymbol { + imgBin := newImageBinary(ensureGrayScale(img)) + advance := math.MaxInt + if opts != nil { + advance = opts.Advance + } + fs := &FontSymbol{ + symbol: symbol, + image: imgBin, + width: imgBin.width, + height: imgBin.height, + advance: advance, } return fs } -func (f *fontSymbol) String() string { return f.symbol } +func (f FontSymbol) Advance() int { + if f.advance == math.MaxInt { + return f.width + } + return f.advance +} + +func (f *FontSymbol) String() string { return f.symbol } + +type NewFontSymbolOptions struct { + // The advance of the symbol, taken into account when recognizing texts./ + // This allows symbols to be closer/further away than the width of the symbol. + // Is ignored when set to math.MaxInt + Advance int +} type fontSymbolLookup struct { - fs *fontSymbol + fs *FontSymbol x, y int g float64 size int } -func newFontSymbolLookup(fs *fontSymbol, x, y int, g float64) *fontSymbolLookup { +func newFontSymbolLookup(fs *FontSymbol, x, y int, g float64) *fontSymbolLookup { return &fontSymbolLookup{fs, x, y, g, fs.image.size} } func (l *fontSymbolLookup) cross(f *fontSymbolLookup) bool { - r := image.Rect(l.x, l.y, l.x+l.fs.width, l.y+l.fs.height) - r2 := image.Rect(f.x, f.y, f.x+f.fs.width, f.y+f.fs.height) + r := image.Rect(l.x, l.y, l.x+l.fs.Advance(), l.y+l.fs.height) + r2 := image.Rect(f.x, f.y, f.x+f.fs.Advance(), f.y+f.fs.height) return r.Intersect(r2) != image.Rectangle{} } @@ -91,13 +122,13 @@ func (l *fontSymbolLookup) String() string { return fmt.Sprintf("'%s'(%d,%d,%d)[%f]", l.fs.symbol, l.x, l.y, l.size, l.g) } -func loadFont(path string) ([]*fontSymbol, error) { +func loadFont(path string) ([]*FontSymbol, error) { files, err := ioutil.ReadDir(path) if err != nil { return nil, err } - fonts := make([]*fontSymbol, 0) + fonts := make([]*FontSymbol, 0) for _, f := range files { if f.IsDir() || strings.HasPrefix(f.Name(), ".") { continue @@ -106,12 +137,12 @@ func loadFont(path string) ([]*fontSymbol, error) { if err != nil { return nil, err } - fonts = append(fonts,fs) + fonts = append(fonts, fs) } return fonts, nil } -func loadSymbol(path string, fileName string) (*fontSymbol, error) { +func loadSymbol(path string, fileName string) (*FontSymbol, error) { imageFile, err := os.Open(path + "/" + fileName) if err != nil { return nil, err @@ -123,15 +154,16 @@ func loadSymbol(path string, fileName string) (*fontSymbol, error) { return nil, err } - symbolName, err := url.QueryUnescape(fileName) + nameWithoutExtension := strings.TrimSuffix(fileName, ".png") + symbolName, err := url.QueryUnescape(nameWithoutExtension) if err != nil { return nil, err } symbolName = strings.Replace(symbolName, "\u200b", "", -1) // Remove zero width spaces - fs := newFontSymbol( - strings.TrimSuffix(symbolName, ".png"), - ensureGrayScale(img), + fs := NewFontSymbol( + symbolName, + img, ) return fs, nil } diff --git a/fonts_test.go b/fonts_test.go index f650d24..d59aaf0 100644 --- a/fonts_test.go +++ b/fonts_test.go @@ -8,9 +8,9 @@ import ( ) func TestFontSymbol(t *testing.T) { - Convey("When I create a fontSymbol from a image dile", t, func() { + Convey("When I create a fontSymbol from a image file", t, func() { img := loadImageGray("testdata/font_1/0.png") - fs := newFontSymbol("0", img) + fs := NewFontSymbol("0", img) Convey("It loads the image as a imageBinary", func() { So(fs.image, ShouldHaveSameTypeAs, &imageBinary{}) So(fs.image.width, ShouldEqual, img.Bounds().Max.X) diff --git a/ocr.go b/ocr.go index e01301b..f8e2cdd 100644 --- a/ocr.go +++ b/ocr.go @@ -19,9 +19,9 @@ import ( // too much). To do so add unicode ZERO WIDTH SPACE symbol (%E2%80%8B) to the filename. // Ex: %2F%E2%80%8B.png will produce '/' symbol as well. type OCR struct { - fontFamilies map[string][]*fontSymbol + fontFamilies map[string][]*FontSymbol threshold float64 - allSymbols []*fontSymbol + allSymbols []*FontSymbol numThreads int } @@ -31,7 +31,7 @@ type OCR struct { // only one thread func NewOCR(threshold float64, numThreads ...int) *OCR { ocr := &OCR{ - fontFamilies: make(map[string][]*fontSymbol), + fontFamilies: make(map[string][]*FontSymbol), threshold: threshold, numThreads: 1, } @@ -43,6 +43,22 @@ func NewOCR(threshold float64, numThreads ...int) *OCR { return ocr } +// Adds symbols associated to a certain font family. +// Allows adding to an existing family (no checks are done to avoid duplicated symbols). +func (o *OCR) AddFontFamily(name string, symbols ...*FontSymbol) { + family := o.fontFamilies[name] + family = append(family, symbols...) + + o.fontFamilies[name] = family + + o.AddSymbols(symbols...) +} + +// Adds symbols not associated to a specific font family. +func (o *OCR) AddSymbols(symbols ...*FontSymbol) { + o.allSymbols = append(o.allSymbols, symbols...) +} + // LoadFont loads a specific fontset from the given folder. Fonts are simple image files // containing a PNG/JPEG of the font, and named after the "letter" represented by the image. // @@ -52,33 +68,16 @@ func (o *OCR) LoadFont(fontPath string) error { return err } - fontFamily, err := loadFont(fontPath) + symbols, err := loadFont(fontPath) if err != nil { return err } familyName := filepath.Base(fontPath) - family, ok := o.fontFamilies[familyName] - if !ok { - family = make([]*fontSymbol, 0, len(fontFamily)) - } - - family = append(family, fontFamily...) - o.fontFamilies[familyName] = family - - o.updateAllSymbols() + o.AddFontFamily(familyName, symbols...) return nil } -func (o *OCR) updateAllSymbols() { - total := 0 - o.allSymbols = nil - for _, family := range o.fontFamilies { - total += len(family) - o.allSymbols = append(o.allSymbols, family...) - } -} - // Recognize the text in the image using the fontsets previously loaded. If a SubImage // is received, the search will be limited by the boundaries of the SubImage func (o *OCR) Recognize(img image.Image) (string, error) { @@ -132,15 +131,14 @@ func (o *OCR) filterAndArrange(all []*fontSymbolLookup) string { var str strings.Builder x := all[0].x - cx := 0 + previousAdvance := 0 for i, s := range all { - maxCX := max(cx, s.fs.width) - // if distance between end of previous symbol and beginning of the // current is larger then a char size, then it is a space // This should not be applied in the beginning (i == 0) as it would put a white space for // any s.x > maxCX will have a (useless) whitespace in front - if s.x-x >= maxCX && i != 0 { + maxCurrentPreviousAdvance := max(previousAdvance, s.fs.Advance()) + if s.x-x >= maxCurrentPreviousAdvance && i != 0 { str.WriteString(" ") } @@ -149,8 +147,8 @@ func (o *OCR) filterAndArrange(all []*fontSymbolLookup) string { str.WriteString("\n") } - x = s.x + s.fs.width - cx = s.fs.width + x = s.x + s.fs.Advance() + previousAdvance = s.fs.Advance() str.WriteString(s.fs.symbol) } diff --git a/ocr_parallel_find.go b/ocr_parallel_find.go index 3e7a4d2..e0377ed 100644 --- a/ocr_parallel_find.go +++ b/ocr_parallel_find.go @@ -6,7 +6,7 @@ import ( ) // Search for all symbols in the image in parallel. Uses a Fan-out/fan-in approach. -func findAllInParallel(numWorkers int, symbols []*fontSymbol, img *imageBinary, threshold float64, rect image.Rectangle) ([]*fontSymbolLookup, error) { +func findAllInParallel(numWorkers int, symbols []*FontSymbol, img *imageBinary, threshold float64, rect image.Rectangle) ([]*fontSymbolLookup, error) { f := ¶llelFinder{ numWorkers: max(numWorkers, 1), symbols: symbols, @@ -21,7 +21,7 @@ type parallelFinder struct { img *imageBinary threshold float64 numWorkers int - symbols []*fontSymbol + symbols []*FontSymbol rect image.Rectangle } @@ -30,8 +30,8 @@ type lookupResult struct { err error } -func (f *parallelFinder) prepare(done <-chan struct{}) <-chan *fontSymbol { - out := make(chan *fontSymbol) +func (f *parallelFinder) prepare(done <-chan struct{}) <-chan *FontSymbol { + out := make(chan *FontSymbol) go func() { defer close(out) for _, s := range f.symbols { @@ -45,7 +45,7 @@ func (f *parallelFinder) prepare(done <-chan struct{}) <-chan *fontSymbol { return out } -func (f *parallelFinder) addWorker(done <-chan struct{}, in <-chan *fontSymbol) <-chan lookupResult { +func (f *parallelFinder) addWorker(done <-chan struct{}, in <-chan *FontSymbol) <-chan lookupResult { out := make(chan lookupResult) go func() { defer close(out)