Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 57 additions & 25 deletions fonts.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,46 +4,77 @@ import (
"fmt"
"image"
"io/ioutil"
"math"
"net/url"
"os"
"strings"
)

type fontSymbol struct {
symbol string
image *imageBinary
width int
height int
type FontSymbol struct {
symbol string
image *imageBinary
width int
height int
advance int
}

func newFontSymbol(symbol string, img image.Image) *fontSymbol {
imgBin := newImageBinary(img)
fs := &fontSymbol{
symbol: symbol,
image: imgBin,
width: imgBin.width,
height: imgBin.height,
// NewFontSymbolRune creates a new symbol for a rune. opts are optional (if set to nil).
func NewFontSymbolRune(symbol rune, img image.Image, opts *NewFontSymbolOptions) *FontSymbol {
return NewFontSymbolOpts(string([]rune{symbol}), img, opts)
}

func NewFontSymbol(symbol string, img image.Image) *FontSymbol {
return NewFontSymbolOpts(symbol, img, nil)
}

// NewFontSymbolOpts creates a new symbol for a rune. Use NewFontSymbol for using the default options.
func NewFontSymbolOpts(symbol string, img image.Image, opts *NewFontSymbolOptions) *FontSymbol {
imgBin := newImageBinary(ensureGrayScale(img))
advance := math.MaxInt
if opts != nil {
advance = opts.Advance
}
fs := &FontSymbol{
symbol: symbol,
image: imgBin,
width: imgBin.width,
height: imgBin.height,
advance: advance,
}

return fs
}

func (f *fontSymbol) String() string { return f.symbol }
func (f FontSymbol) Advance() int {
if f.advance == math.MaxInt {
return f.width
}
return f.advance
}

func (f *FontSymbol) String() string { return f.symbol }

type NewFontSymbolOptions struct {
// The advance of the symbol, taken into account when recognizing texts./
// This allows symbols to be closer/further away than the width of the symbol.
// Is ignored when set to math.MaxInt
Advance int
}

type fontSymbolLookup struct {
fs *fontSymbol
fs *FontSymbol
x, y int
g float64
size int
}

func newFontSymbolLookup(fs *fontSymbol, x, y int, g float64) *fontSymbolLookup {
func newFontSymbolLookup(fs *FontSymbol, x, y int, g float64) *fontSymbolLookup {
return &fontSymbolLookup{fs, x, y, g, fs.image.size}
}

func (l *fontSymbolLookup) cross(f *fontSymbolLookup) bool {
r := image.Rect(l.x, l.y, l.x+l.fs.width, l.y+l.fs.height)
r2 := image.Rect(f.x, f.y, f.x+f.fs.width, f.y+f.fs.height)
r := image.Rect(l.x, l.y, l.x+l.fs.Advance(), l.y+l.fs.height)
r2 := image.Rect(f.x, f.y, f.x+f.fs.Advance(), f.y+f.fs.height)

return r.Intersect(r2) != image.Rectangle{}
}
Expand Down Expand Up @@ -91,13 +122,13 @@ func (l *fontSymbolLookup) String() string {
return fmt.Sprintf("'%s'(%d,%d,%d)[%f]", l.fs.symbol, l.x, l.y, l.size, l.g)
}

func loadFont(path string) ([]*fontSymbol, error) {
func loadFont(path string) ([]*FontSymbol, error) {
files, err := ioutil.ReadDir(path)
if err != nil {
return nil, err
}

fonts := make([]*fontSymbol, 0)
fonts := make([]*FontSymbol, 0)
for _, f := range files {
if f.IsDir() || strings.HasPrefix(f.Name(), ".") {
continue
Expand All @@ -106,12 +137,12 @@ func loadFont(path string) ([]*fontSymbol, error) {
if err != nil {
return nil, err
}
fonts = append(fonts,fs)
fonts = append(fonts, fs)
}
return fonts, nil
}

func loadSymbol(path string, fileName string) (*fontSymbol, error) {
func loadSymbol(path string, fileName string) (*FontSymbol, error) {
imageFile, err := os.Open(path + "/" + fileName)
if err != nil {
return nil, err
Expand All @@ -123,15 +154,16 @@ func loadSymbol(path string, fileName string) (*fontSymbol, error) {
return nil, err
}

symbolName, err := url.QueryUnescape(fileName)
nameWithoutExtension := strings.TrimSuffix(fileName, ".png")
symbolName, err := url.QueryUnescape(nameWithoutExtension)
if err != nil {
return nil, err
}

symbolName = strings.Replace(symbolName, "\u200b", "", -1) // Remove zero width spaces
fs := newFontSymbol(
strings.TrimSuffix(symbolName, ".png"),
ensureGrayScale(img),
fs := NewFontSymbol(
symbolName,
img,
)
return fs, nil
}
4 changes: 2 additions & 2 deletions fonts_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ import (
)

func TestFontSymbol(t *testing.T) {
Convey("When I create a fontSymbol from a image dile", t, func() {
Convey("When I create a fontSymbol from a image file", t, func() {
img := loadImageGray("testdata/font_1/0.png")
fs := newFontSymbol("0", img)
fs := NewFontSymbol("0", img)
Convey("It loads the image as a imageBinary", func() {
So(fs.image, ShouldHaveSameTypeAs, &imageBinary{})
So(fs.image.width, ShouldEqual, img.Bounds().Max.X)
Expand Down
54 changes: 26 additions & 28 deletions ocr.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ import (
// too much). To do so add unicode ZERO WIDTH SPACE symbol (%E2%80%8B) to the filename.
// Ex: %2F%E2%80%8B.png will produce '/' symbol as well.
type OCR struct {
fontFamilies map[string][]*fontSymbol
fontFamilies map[string][]*FontSymbol
threshold float64
allSymbols []*fontSymbol
allSymbols []*FontSymbol
numThreads int
}

Expand All @@ -31,7 +31,7 @@ type OCR struct {
// only one thread
func NewOCR(threshold float64, numThreads ...int) *OCR {
ocr := &OCR{
fontFamilies: make(map[string][]*fontSymbol),
fontFamilies: make(map[string][]*FontSymbol),
threshold: threshold,
numThreads: 1,
}
Expand All @@ -43,6 +43,22 @@ func NewOCR(threshold float64, numThreads ...int) *OCR {
return ocr
}

// Adds symbols associated to a certain font family.
// Allows adding to an existing family (no checks are done to avoid duplicated symbols).
func (o *OCR) AddFontFamily(name string, symbols ...*FontSymbol) {
family := o.fontFamilies[name]
family = append(family, symbols...)

o.fontFamilies[name] = family

o.AddSymbols(symbols...)
}

// Adds symbols not associated to a specific font family.
func (o *OCR) AddSymbols(symbols ...*FontSymbol) {
o.allSymbols = append(o.allSymbols, symbols...)
}

// LoadFont loads a specific fontset from the given folder. Fonts are simple image files
// containing a PNG/JPEG of the font, and named after the "letter" represented by the image.
//
Expand All @@ -52,33 +68,16 @@ func (o *OCR) LoadFont(fontPath string) error {
return err
}

fontFamily, err := loadFont(fontPath)
symbols, err := loadFont(fontPath)
if err != nil {
return err
}

familyName := filepath.Base(fontPath)
family, ok := o.fontFamilies[familyName]
if !ok {
family = make([]*fontSymbol, 0, len(fontFamily))
}

family = append(family, fontFamily...)
o.fontFamilies[familyName] = family

o.updateAllSymbols()
o.AddFontFamily(familyName, symbols...)
return nil
}

func (o *OCR) updateAllSymbols() {
total := 0
o.allSymbols = nil
for _, family := range o.fontFamilies {
total += len(family)
o.allSymbols = append(o.allSymbols, family...)
}
}

// Recognize the text in the image using the fontsets previously loaded. If a SubImage
// is received, the search will be limited by the boundaries of the SubImage
func (o *OCR) Recognize(img image.Image) (string, error) {
Expand Down Expand Up @@ -132,15 +131,14 @@ func (o *OCR) filterAndArrange(all []*fontSymbolLookup) string {

var str strings.Builder
x := all[0].x
cx := 0
previousAdvance := 0
for i, s := range all {
maxCX := max(cx, s.fs.width)

// if distance between end of previous symbol and beginning of the
// current is larger then a char size, then it is a space
// This should not be applied in the beginning (i == 0) as it would put a white space for
// any s.x > maxCX will have a (useless) whitespace in front
if s.x-x >= maxCX && i != 0 {
maxCurrentPreviousAdvance := max(previousAdvance, s.fs.Advance())
if s.x-x >= maxCurrentPreviousAdvance && i != 0 {
str.WriteString(" ")
}

Expand All @@ -149,8 +147,8 @@ func (o *OCR) filterAndArrange(all []*fontSymbolLookup) string {
str.WriteString("\n")
}

x = s.x + s.fs.width
cx = s.fs.width
x = s.x + s.fs.Advance()
previousAdvance = s.fs.Advance()
str.WriteString(s.fs.symbol)
}

Expand Down
10 changes: 5 additions & 5 deletions ocr_parallel_find.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
)

// Search for all symbols in the image in parallel. Uses a Fan-out/fan-in approach.
func findAllInParallel(numWorkers int, symbols []*fontSymbol, img *imageBinary, threshold float64, rect image.Rectangle) ([]*fontSymbolLookup, error) {
func findAllInParallel(numWorkers int, symbols []*FontSymbol, img *imageBinary, threshold float64, rect image.Rectangle) ([]*fontSymbolLookup, error) {
f := &parallelFinder{
numWorkers: max(numWorkers, 1),
symbols: symbols,
Expand All @@ -21,7 +21,7 @@ type parallelFinder struct {
img *imageBinary
threshold float64
numWorkers int
symbols []*fontSymbol
symbols []*FontSymbol
rect image.Rectangle
}

Expand All @@ -30,8 +30,8 @@ type lookupResult struct {
err error
}

func (f *parallelFinder) prepare(done <-chan struct{}) <-chan *fontSymbol {
out := make(chan *fontSymbol)
func (f *parallelFinder) prepare(done <-chan struct{}) <-chan *FontSymbol {
out := make(chan *FontSymbol)
go func() {
defer close(out)
for _, s := range f.symbols {
Expand All @@ -45,7 +45,7 @@ func (f *parallelFinder) prepare(done <-chan struct{}) <-chan *fontSymbol {
return out
}

func (f *parallelFinder) addWorker(done <-chan struct{}, in <-chan *fontSymbol) <-chan lookupResult {
func (f *parallelFinder) addWorker(done <-chan struct{}, in <-chan *FontSymbol) <-chan lookupResult {
out := make(chan lookupResult)
go func() {
defer close(out)
Expand Down