diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..cb8f3ca --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "src/djvulibre"] + path = src/djvulibre + url = https://github.com/barak/djvulibre diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d159169 --- /dev/null +++ b/LICENSE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/Makefile b/Makefile index 30e0a95..48443bd 100644 --- a/Makefile +++ b/Makefile @@ -17,31 +17,42 @@ #You should have received a copy of the GNU General Public License #along with Simpledjvu. If not, see . - -DJVULIBRE_PATH = /home/mihaild/djvulibre -CC = g++ -O3 -std=c++0x -MMD -INCLUDES=-I$(DJVULIBRE_PATH) -I$(DJVULIBRE_PATH)/libdjvu -I$(DJVULIBRE_PATH)/tools -I. -CXXFLAGS=$(INCLUDES) -DHAVE_CONFIG_H -pthread -DTHREADMODEL=POSIXTHREADS -LINK=g++ -O3 - -BIN_FILES = simpledjvu get_pgm_diff - -all: $(BIN_FILES) - -simpledjvu: build/simpledjvu.o build/hystogram_splitter.o build/normalize.o build/pgm2jb2.o build/jb2tune.o jb2cmp/libjb2cmp.a - $(LINK) -o simpledjvu $^ -DHAVE_CONFIG_H -ldjvulibre - -get_pgm_diff: build/get_pgm_diff.o - $(LINK) -o get_pgm_diff $^ -DHAVE_CONFIG_H -ldjvulibre - -build/%.o build/%.d: %.cpp - $(CC) $(CXXFLAGS) -c -o build/$*.o $*.cpp - -jb2cmp/libjb2cmp.a: - cd jb2cmp && ${MAKE} +PROJECT = simpledjvu +DJVULIBRE_PATH = src/djvulibre +CXX = g++ -O3 -std=c++0x +INCLUDES = -I$(DJVULIBRE_PATH) -I$(DJVULIBRE_PATH)/libdjvu -I$(DJVULIBRE_PATH)/tools -Isrc +CXXFLAGS = $(INCLUDES) -DHAVE_CONFIG_H -pthread -DTHREADMODEL=POSIXTHREADS +LDFLAGS = -ldjvulibre +LN = $(CXX) -DHAVE_CONFIG_H +RM = rm -f + +OBJ_FILES = \ + src/hystogram_splitter.o \ + src/normalize.o \ + src/pgm2jb2.o \ + src/djvulibre/tools/jb2tune.o \ + src/djvulibre/tools/jb2cmp/classify.o \ + src/djvulibre/tools/jb2cmp/cuts.o \ + src/djvulibre/tools/jb2cmp/frames.o \ + src/djvulibre/tools/jb2cmp/patterns.o \ + src/simpledjvu.o +OBJ_FILE_PGM = src/get_pgm_diff.o +BIN_FILES = $(PROJECT) get_pgm_diff + +all: djvulibre_config $(BIN_FILES) + +$(PROJECT): $(OBJ_FILES) + $(LN) $^ $(LDFLAGS) -o $@ + +get_pgm_diff: $(OBJ_FILE_PGM) + $(LN) $^ $(LDFLAGS) -o $@ + +%.o: %.cpp + $(CXX) $(CXXFLAGS) -c $< -o $@ + +djvulibre_config: + cd src/djvulibre && ./autogen.sh clean: - rm -f build/* $(BIN_FILES) - cd jb2cmp && ${MAKE} clean - --include $(wildcard build/*.d) + $(RM) $(OBJ_FILES) $(OBJ_FILE_PGM) $(BIN_FILES) + cd src/jb2cmp && ${MAKE} clean diff --git a/README.md b/README.md index d536356..6799dc5 100644 --- a/README.md +++ b/README.md @@ -5,19 +5,33 @@ It uses [djvulibre](http://djvu.sourceforge.net/) for all technichal work and co Really, the only one thing that it does itself is splitting the image to mask, background and foreground. ## Install -I am too stupid to understand, how does autoconf and other such tools work, so you have to change Makefile manually. -In usual case, set DJVULIBRE_PATH to right value is enough. -May be, you will need to change CXXFLAGS - just look with which flags djvulibre compiles on your machine and copy them. +I am too stupid to understand, how does autoconf and other such tools work, so you have to change `Makefile` manually. + +### load submodules + +submodules: + +- [djvulibre](https://github.com/barak/djvulibre) -> [src](src) + +```shell +$ git submodule init +$ git submodule update +``` + +### build After it, just say `make` in project directory. If you want, you can change your PATH variable or copy **simpledjvu** binary to any directory already included in your PATH. -You need g++ version supports c++0x standard flag. +You need `g++` version supports `c++0x` standard flag. ## Usage -`simpledjvu [options] **input.pgm** **output.djvu**` + +```shell +simpledjvu [options] input.pgm output.djvu +``` where options = @@ -27,6 +41,8 @@ where options = **-mask_mul n** Multiplicate mask size n times. +**-dpi n** DPI output djvu. + **-use_normalized** Use normalized image (in which "almost black" and "almost white" colors are exactly black and white) for background and foreground except of original. **-normalize_iters n** Use *n* normalization iterations for mask (see "Algorithm description"). @@ -39,9 +55,10 @@ where options = **-slices_fg n1,n2,...** Use *n1,n2,...* as number of slices for c44 for foreground. -You can use imagemagick or any other similar tool to obtain pgm from other format. +You can use [Netpbm](https://sourceforge.net/projects/netpbm/) or any other similar tool to obtain `pgm` from other format. ## Algorithm description. + Algorithm is very simple, but it gives surprisingly good result for non-pathological images. As usual, background and foreground are really white and black parts, not paper and letters. @@ -51,4 +68,4 @@ Increase this images to original image size, and change the image: make absolute Now we repeat this step many times, and result converges to almost black-and-white image, so we can do simple threshold to obtain the mask. -We use c44 for partially masked images using blurred mask for background, and blurred inverted mask for foreground. +We use `c44` for partially masked images using blurred mask for background, and blurred inverted mask for foreground. diff --git a/jb2cmp/Makefile b/jb2cmp/Makefile deleted file mode 100644 index 1c1d936..0000000 --- a/jb2cmp/Makefile +++ /dev/null @@ -1,109 +0,0 @@ -#C- This program is free software; you can redistribute it and/or -#C- modify it under the terms of the GNU General Public License, -#C- either Version 2 of the License or (at your option) any later -#C- version. The license should have accompanied the program -#C- or you may obtain a copy of the license from the Free Software -#C- Foundation at http://www.fsf.org. -#C- -#C- This program is distributed in the hope that it will be useful, -#C- but WITHOUT ANY WARRANTY; without even the implied warranty of -#C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -#C- GNU General Public License for more details. - - -SHELL = /bin/bash - -PACKAGE_NAME = djvulibre -PACKAGE_VERSION = 3.5.25 - -srcdir = . -top_srcdir = ../.. -top_builddir = ../.. - -prefix = /usr/local -exec_prefix = ${prefix} -bindir = ${exec_prefix}/bin -datarootdir = ${prefix}/share -datadir = ${datarootdir} -libdir = ${exec_prefix}/lib -mandir = ${datarootdir}/man - -CC = gcc -CXX = g++ -RM = /bin/rm -f -AR = /usr/bin/ar -RANLIB = ranlib -LN_S = ln -s -TOUCH = /usr/bin/touch -INSTALL = /usr/bin/install -c -INSTALL_PROGRAM = ${INSTALL} -INSTALL_DATA = ${INSTALL} -m 644 -LIBTOOL = $(SHELL) $(top_builddir)/libtool - -DEFS = -DHAVE_CONFIG_H -OPTS = -DNDEBUG -Wall -O3 -Wno-non-virtual-dtor -CXXRPOFLAGS = -THREAD_LIBS = -lpthread -THREAD_CFLAGS = -pthread -DTHREADMODEL=POSIXTHREADS - -INCS = -I${top_builddir} -I${top_srcdir} -I${srcdir} -FLAGS = ${DEFS} ${INCS} ${OPTS} ${THREAD_CFLAGS} -LIBS= ${THREAD_LIBS} -lm -CFLAGS = ${FLAGS} -CXXFLAGS = ${CXXRPOFLAGS} ${FLAGS} - - -OBJS = classify.o cuts.o frames.o patterns.o -JB2CMP_A = libjb2cmp.a - -all: ${JB2CMP_A} - -#${JB2CMP_A} : libjb2cmp-rpo -#libjb2cmp-rpo : ${OBJS} -# @echo "[ performing library closure ... ]" -# @echo "int main() { return 0; }" > $@.cpp -# ${CXX} ${CXXFLAGS} -c $@.cpp -# ${CXX} ${CXXFLAGS} -o $@ $@.o ${OBJS} ${LIBS} -install: - -depend: FORCE - ${CXX} -MM ${CXXFLAGS} ${srcdir}/*.cpp > Makefile.dep - -clean: FORCE - -${RM} 2>/dev/null ${JB2CMP} - -${RM} 2>/dev/null *.o *.a *.so *.stamp - -distclean: clean - -${RM} -r 2>/dev/null Makefile *.rpo ii_files - -# Link - -${JB2CMP_A}: ${OBJS} - -${RM} 2>/dev/null ${JB2CMP_A} - ${AR} cr ${JB2CMP_A} ${OBJS} - ${RANLIB} ${JB2CMP_A} - -# Rules - -FORCE: -.PHONY: FORCE -.SUFFIXES: .c .cpp .o - -.c.o: - ${CC} ${CFLAGS} -c $< - -.cpp.o: - ${CXX} ${CXXFLAGS} -c $< - -.c.lo: - ${LIBTOOL} --mode=compile ${CC} ${CFLAGS} -c $< - -.cpp.lo: - ${LIBTOOL} --mode=compile ${CXX} ${CXXFLAGS} -c $< - -# Dependencies - -Makefile.dep: ${srcdir}/Makefile.dep - cp ${srcdir}/Makefile.dep $@ - -include Makefile.dep diff --git a/jb2cmp/Makefile.dep b/jb2cmp/Makefile.dep deleted file mode 100644 index ac43ded..0000000 --- a/jb2cmp/Makefile.dep +++ /dev/null @@ -1,4 +0,0 @@ -classify.o: classify.cpp mdjvucfg.h minidjvu.h patterns.h classify.h -cuts.o: cuts.cpp mdjvucfg.h minidjvu.h patterns.h classify.h -frames.o: frames.cpp mdjvucfg.h minidjvu.h patterns.h classify.h -patterns.o: patterns.cpp mdjvucfg.h minidjvu.h patterns.h classify.h diff --git a/jb2cmp/Makefile.in b/jb2cmp/Makefile.in deleted file mode 100644 index 4825c76..0000000 --- a/jb2cmp/Makefile.in +++ /dev/null @@ -1,109 +0,0 @@ -#C- This program is free software; you can redistribute it and/or -#C- modify it under the terms of the GNU General Public License, -#C- either Version 2 of the License or (at your option) any later -#C- version. The license should have accompanied the program -#C- or you may obtain a copy of the license from the Free Software -#C- Foundation at http://www.fsf.org. -#C- -#C- This program is distributed in the hope that it will be useful, -#C- but WITHOUT ANY WARRANTY; without even the implied warranty of -#C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -#C- GNU General Public License for more details. - -@SET_MAKE@ -SHELL = @SHELL@ -VPATH = @srcdir@ -PACKAGE_NAME = @PACKAGE_NAME@ -PACKAGE_VERSION = @PACKAGE_VERSION@ - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -top_builddir = ../.. - -prefix = @prefix@ -exec_prefix = @exec_prefix@ -bindir = @bindir@ -datarootdir = @datarootdir@ -datadir = @datadir@ -libdir = @libdir@ -mandir = @mandir@ - -CC = @CC@ -CXX = @CXX@ -RM = @RM@ -AR = @AR@ -RANLIB = @RANLIB@ -LN_S = @LN_S@ -TOUCH = @TOUCH@ -INSTALL = @INSTALL@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ -INSTALL_DATA = @INSTALL_DATA@ -LIBTOOL = @LIBTOOL@ - -DEFS = @DEFS@ -OPTS = @OPTS@ -CXXRPOFLAGS = @CXXRPOFLAGS@ -THREAD_LIBS = @THREAD_LIBS@ -THREAD_CFLAGS = @THREAD_CFLAGS@ - -INCS = -I${top_builddir} -I${top_srcdir} -I${srcdir} -FLAGS = ${DEFS} ${INCS} ${OPTS} ${THREAD_CFLAGS} -LIBS= @LDFLAGS@ ${THREAD_LIBS} @LIBS@ -CFLAGS = @CPPFLAGS@ @CFLAGS@ ${FLAGS} -CXXFLAGS = @CPPFLAGS@ @CXXFLAGS@ ${CXXRPOFLAGS} ${FLAGS} - - -OBJS = classify.o cuts.o frames.o patterns.o -JB2CMP_A = libjb2cmp.a - -all: ${JB2CMP_A} - -@RPO_YES@${JB2CMP_A} : libjb2cmp-rpo -@RPO_YES@libjb2cmp-rpo : ${OBJS} -@RPO_YES@ @echo "[ performing library closure ... ]" -@RPO_YES@ @echo "int main() { return 0; }" > $@.cpp -@RPO_YES@ ${CXX} ${CXXFLAGS} -c $@.cpp -@RPO_YES@ ${CXX} ${CXXFLAGS} -o $@ $@.o ${OBJS} ${LIBS} -install: - -depend: FORCE - ${CXX} -MM ${CXXFLAGS} ${srcdir}/*.cpp > Makefile.dep - -clean: FORCE - -${RM} 2>/dev/null ${JB2CMP} - -${RM} 2>/dev/null *.o *.a *.so *.stamp - -distclean: clean - -${RM} -r 2>/dev/null Makefile *.rpo ii_files - -# Link - -${JB2CMP_A}: ${OBJS} - -${RM} 2>/dev/null ${JB2CMP_A} - ${AR} cr ${JB2CMP_A} ${OBJS} - ${RANLIB} ${JB2CMP_A} - -# Rules - -FORCE: -.PHONY: FORCE -.SUFFIXES: .c .cpp .o - -.c.o: - ${CC} ${CFLAGS} -c $< - -.cpp.o: - ${CXX} ${CXXFLAGS} -c $< - -.c.lo: - ${LIBTOOL} --mode=compile ${CC} ${CFLAGS} -c $< - -.cpp.lo: - ${LIBTOOL} --mode=compile ${CXX} ${CXXFLAGS} -c $< - -# Dependencies - -Makefile.dep: ${srcdir}/Makefile.dep - cp ${srcdir}/Makefile.dep $@ - -include Makefile.dep diff --git a/jb2cmp/README b/jb2cmp/README deleted file mode 100644 index 4ea2709..0000000 --- a/jb2cmp/README +++ /dev/null @@ -1,3 +0,0 @@ -Files in this directory come from the minidjvu project -. -Thanks to Ilya Mezhirov. diff --git a/jb2cmp/classify.cpp b/jb2cmp/classify.cpp deleted file mode 100644 index fb0ae2a..0000000 --- a/jb2cmp/classify.cpp +++ /dev/null @@ -1,299 +0,0 @@ -/* minidjvu - library for handling bilevel images with DjVuBitonal support - * - * classify.c - classifying patterns - * - * Copyright (C) 2005 Ilya Mezhirov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * - * minidjvu is derived from DjVuLibre (http://djvu.sourceforge.net) - * All over DjVuLibre there is a patent alert from LizardTech - * which I guess I should reproduce (don't ask me what does this mean): - * - * ------------------------------------------------------------------ - * | DjVu (r) Reference Library (v. 3.5) - * | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. - * | The DjVu Reference Library is protected by U.S. Pat. No. - * | 6,058,214 and patents pending. - * | - * | This software is subject to, and may be distributed under, the - * | GNU General Public License, either Version 2 of the license, - * | or (at your option) any later version. The license should have - * | accompanied the software or you may obtain a copy of the license - * | from the Free Software Foundation at http://www.fsf.org . - * | - * | The computer code originally released by LizardTech under this - * | license and unmodified by other parties is deemed "the LIZARDTECH - * | ORIGINAL CODE." Subject to any third party intellectual property - * | claims, LizardTech grants recipient a worldwide, royalty-free, - * | non-exclusive license to make, use, sell, or otherwise dispose of - * | the LIZARDTECH ORIGINAL CODE or of programs derived from the - * | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU - * | General Public License. This grant only confers the right to - * | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to - * | the extent such infringement is reasonably necessary to enable - * | recipient to make, have made, practice, sell, or otherwise dispose - * | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to - * | any greater extent that may be necessary to utilize further - * | modifications or combinations. - * | - * | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY - * | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED - * | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF - * | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - * +------------------------------------------------------------------ - */ - - -#include "mdjvucfg.h" -#include "minidjvu.h" -#include - - -/* Stuff for not using malloc in C++ - * (made by Leon Bottou; has no use in minidjvu, - * but left here for potential DjVuLibre compatibility) - */ -#ifdef __cplusplus -# define MALLOC(Type) new Type -# define FREE(p) delete p -# define MALLOCV(Type,n) new Type[n] -# define FREEV(p) delete [] p -#else -# define MALLOC(Type) ((Type*)malloc(sizeof(Type))) -# define FREE(p) do{if(p)free(p);}while(0) -# define MALLOCV(Type,n) ((Type*)malloc(sizeof(Type)*(n))) -# define FREEV(p) do{if(p)free(p);}while(0) -#endif - - -/* Classes are single-linked lists with an additional pointer to the last node. - * This is an class item. - */ -typedef struct ClassNode -{ - mdjvu_pattern_t ptr; - struct ClassNode *next; /* NULL if this node is the last one */ - struct ClassNode *global_next; /* next among all nodes to classify */ - int32 tag; /* filled before the final dumping */ -} ClassNode; - -/* Classes themselves are composed in double-linked list. */ -typedef struct Class -{ - ClassNode *first, *last; - struct Class *prev_class; - struct Class *next_class; -} Class; - - -typedef struct Classification -{ - Class *first_class; - ClassNode *first_node, *last_node; -} Classification; - -/* Creates an empty class and links it to the list of classes. */ -static Class *new_class(Classification *cl) -{ - Class *c = MALLOC(Class); - c->first = c->last = NULL; - c->prev_class = NULL; - c->next_class = cl->first_class; - if (cl->first_class) cl->first_class->prev_class = c; - cl->first_class = c; - return c; -} - -/* Unlinks a class and deletes it. Its nodes are not deleted. */ -static void delete_class(Classification *cl, Class *c) -{ - Class *prev = c->prev_class, *next = c->next_class; - - if (prev) - prev->next_class = next; - else - cl->first_class = next; - - if (next) - next->prev_class = prev; - - FREE(c); -} - -/* Creates a new node and adds it to the given class. */ -static ClassNode *new_node(Classification *cl, Class *c, mdjvu_pattern_t ptr) -{ - ClassNode *n = MALLOC(ClassNode); - n->ptr = ptr; - n->next = c->first; - c->first = n; - if (!c->last) c->last = n; - n->global_next = NULL; - - if (cl->last_node) - cl->last_node->global_next = n; - else - cl->first_node = n; - - cl->last_node = n; - return n; -} - -/* Merge two classes and delete one of them. */ -static Class *merge(Classification *cl, Class *c1, Class *c2) -{ - if (!c1->first) - { - delete_class(cl, c1); - return c2; - } - if (c2->first) - { - c1->last->next = c2->first; - c1->last = c2->last; - } - delete_class(cl, c2); - return c1; -} - -/* Puts a tag on each node corresponding to its class. */ -static unsigned put_tags(Classification *cl) -{ - int32 tag = 1; - Class *c = cl->first_class; - while (c) - { - ClassNode *n = c->first; - while (n) - { - n->tag = tag; - n = n->next; - } - c = c->next_class; - tag++; - } - return tag - 1; -} - -/* Deletes all classes; nodes are untouched. */ -static void delete_all_classes(Classification *cl) -{ - Class *c = cl->first_class; - while (c) - { - Class *t = c; - c = c->next_class; - FREE(t); - } -} - -/* Compares p with nodes from c until a meaningful result. */ -static int compare_to_class(mdjvu_pattern_t p, Class *c, int32 dpi, - mdjvu_matcher_options_t options) -{ - int r = 0; - ClassNode *n = c->first; - while(n) - { - r = mdjvu_match_patterns(p, n->ptr, dpi, options); - if (r) break; - n = n->next; - } - return r; -} - -static void classify(Classification *cl, mdjvu_pattern_t p, - int32 dpi, mdjvu_matcher_options_t options) -{ - Class *class_of_this = NULL; - Class *c, *next_c = NULL; - for (c = cl->first_class; c; c = next_c) - { - next_c = c->next_class; /* That's because c may be deleted in merging */ - - if (class_of_this == c) continue; - if (compare_to_class(p, c, dpi, options) != 1) continue; - - if (class_of_this) - class_of_this = merge(cl, class_of_this, c); - else - class_of_this = c; - } - if (!class_of_this) class_of_this = new_class(cl); - new_node(cl, class_of_this, p); -} - -MDJVU_IMPLEMENT int32 mdjvu_classify_patterns - (mdjvu_pattern_t *b, int32 *r, int32 n, int32 dpi, - mdjvu_matcher_options_t options) -{ - int32 i, max_tag; - ClassNode *node; - Classification cl; - - cl.first_class = NULL; - cl.first_node = cl.last_node = NULL; - - for (i = 0; i < n; i++) if (b[i]) classify(&cl, b[i], dpi, options); - - max_tag = put_tags(&cl); - delete_all_classes(&cl); - - i = 0; - node = cl.first_node; - while (node) - { - ClassNode *t; - while (!b[i]) r[i++] = 0; - r[i++] = node->tag; - t = node; - node = node->global_next; - FREE(t); - } - if (i < n) while (i < n) r[i++] = 0; - return max_tag; -} - -#ifndef NO_MINIDJVU - -MDJVU_IMPLEMENT int32 mdjvu_classify_bitmaps_in_image - (mdjvu_image_t image, int32 *result, mdjvu_matcher_options_t options) -{ - int32 i, n = mdjvu_image_get_bitmap_count(image); - int32 dpi = mdjvu_image_get_resolution(image); - mdjvu_pattern_t *patterns = MALLOCV(mdjvu_pattern_t, n); - int32 max_tag; - - for (i = 0; i < n; i++) - { - mdjvu_bitmap_t bitmap = mdjvu_image_get_bitmap(image, i); - if (mdjvu_image_get_no_substitution_flag(image, bitmap)) - patterns[i] = NULL; - else - patterns[i] = mdjvu_pattern_create(bitmap); - } - - max_tag = mdjvu_classify_patterns(patterns, result, n, dpi, options); - - for (i = 0; i < n; i++) - if (patterns[i]) mdjvu_pattern_destroy(patterns[i]); - FREEV(patterns); - - return max_tag; -} - -#endif /* NO_MINIDJVU */ diff --git a/jb2cmp/classify.h b/jb2cmp/classify.h deleted file mode 100644 index abf6986..0000000 --- a/jb2cmp/classify.h +++ /dev/null @@ -1,83 +0,0 @@ -/* minidjvu - library for handling bilevel images with DjVuBitonal support - * - * classify.h - classifying patterns - * - * Copyright (C) 2005 Ilya Mezhirov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * - * minidjvu is derived from DjVuLibre (http://djvu.sourceforge.net) - * All over DjVuLibre there is a patent alert from LizardTech - * which I guess I should reproduce (don't ask me what does this mean): - * - * ------------------------------------------------------------------ - * | DjVu (r) Reference Library (v. 3.5) - * | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. - * | The DjVu Reference Library is protected by U.S. Pat. No. - * | 6,058,214 and patents pending. - * | - * | This software is subject to, and may be distributed under, the - * | GNU General Public License, either Version 2 of the license, - * | or (at your option) any later version. The license should have - * | accompanied the software or you may obtain a copy of the license - * | from the Free Software Foundation at http://www.fsf.org . - * | - * | The computer code originally released by LizardTech under this - * | license and unmodified by other parties is deemed "the LIZARDTECH - * | ORIGINAL CODE." Subject to any third party intellectual property - * | claims, LizardTech grants recipient a worldwide, royalty-free, - * | non-exclusive license to make, use, sell, or otherwise dispose of - * | the LIZARDTECH ORIGINAL CODE or of programs derived from the - * | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU - * | General Public License. This grant only confers the right to - * | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to - * | the extent such infringement is reasonably necessary to enable - * | recipient to make, have made, practice, sell, or otherwise dispose - * | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to - * | any greater extent that may be necessary to utilize further - * | modifications or combinations. - * | - * | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY - * | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED - * | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF - * | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - * +------------------------------------------------------------------ - */ - - -#ifndef MDJVU_CLASSIFY_H -#define MDJVU_CLASSIFY_H - -/* Classifies a set of patterns. - * result - array of tags ranging from 1 to return value, - * and 0 for those cells which were NULL (yes, NULLs are permitted). - * Every tag has at least one pattern to which it's attached. - * Equally tagged images are classified equivalent. - */ - -MDJVU_FUNCTION int32 mdjvu_classify_patterns - (mdjvu_pattern_t *, int32 *result, int32 n, int32 dpi, - mdjvu_matcher_options_t); - -#ifndef NO_MINIDJVU - -/* Special tag 0 is reserved for bitmaps marked "no-substitution". */ -MDJVU_FUNCTION int32 mdjvu_classify_bitmaps_in_image - (mdjvu_image_t, int32 *result, mdjvu_matcher_options_t); - -#endif /* NO_MINIDJVU */ - -#endif /* MDJVU_CLASSIFY_H */ diff --git a/jb2cmp/cuts.cpp b/jb2cmp/cuts.cpp deleted file mode 100644 index 90aa020..0000000 --- a/jb2cmp/cuts.cpp +++ /dev/null @@ -1,240 +0,0 @@ -/* minidjvu - library for handling bilevel images with DjVuBitonal support - * - * cuts.c - finding "cuts signature" consisting of consecutive cut positions - * - * Copyright (C) 2005 Ilya Mezhirov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * - * minidjvu is derived from DjVuLibre (http://djvu.sourceforge.net) - * All over DjVuLibre there is a patent alert from LizardTech - * which I guess I should reproduce (don't ask me what does this mean): - * - * ------------------------------------------------------------------ - * | DjVu (r) Reference Library (v. 3.5) - * | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. - * | The DjVu Reference Library is protected by U.S. Pat. No. - * | 6,058,214 and patents pending. - * | - * | This software is subject to, and may be distributed under, the - * | GNU General Public License, either Version 2 of the license, - * | or (at your option) any later version. The license should have - * | accompanied the software or you may obtain a copy of the license - * | from the Free Software Foundation at http://www.fsf.org . - * | - * | The computer code originally released by LizardTech under this - * | license and unmodified by other parties is deemed "the LIZARDTECH - * | ORIGINAL CODE." Subject to any third party intellectual property - * | claims, LizardTech grants recipient a worldwide, royalty-free, - * | non-exclusive license to make, use, sell, or otherwise dispose of - * | the LIZARDTECH ORIGINAL CODE or of programs derived from the - * | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU - * | General Public License. This grant only confers the right to - * | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to - * | the extent such infringement is reasonably necessary to enable - * | recipient to make, have made, practice, sell, or otherwise dispose - * | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to - * | any greater extent that may be necessary to utilize further - * | modifications or combinations. - * | - * | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY - * | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED - * | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF - * | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - * +------------------------------------------------------------------ - */ - - -/* We cut an image horizontally in such a way - * that below and above the cut the blackness is roughly the same. - * Than cutting each of the two pieces vertically in the same fashion. - * Then horizontally, and so on until SIGNATURE_SIZE - 1 cuts. - * The position of each cut is normalized into 0..255 and put into signature. - */ - -#include "mdjvucfg.h" -#include "minidjvu.h" -#include - - -/* Stuff for not using malloc in C++ - * (made by Leon Bottou; has no use in minidjvu, - * but left here for potential DjVuLibre compatibility) - */ -#ifdef __cplusplus -# define MALLOC(Type) new Type -# define FREE(p) delete p -# define MALLOCV(Type,n) new Type[n] -# define FREEV(p) delete [] p -#else -# define MALLOC(Type) ((Type*)malloc(sizeof(Type))) -# define FREE(p) do{if(p)free(p);}while(0) -# define MALLOCV(Type,n) ((Type*)malloc(sizeof(Type)*(n))) -# define FREEV(p) do{if(p)free(p);}while(0) -#endif - - -typedef unsigned char byte; - -static int32 sum_column_gray(byte **pixels, int32 x, int32 y1, int32 y2) -{ - int sum = 0, y; - for (y = y1; y <= y2; y++) sum += pixels[y][x]; - return sum; -} - -static int32 sum_row_gray(byte *row, int32 x1, int32 x2) -{ - int sum = 0, x, n = x2 - x1; - byte *p = row + x1; - for (x = 0; x <= n; x++) sum += p[x]; - return sum; -} - -static int32 sum_column_black_and_white(byte **pixels, int32 x, int32 y1, int32 y2) -{ - int sum = 0, y; - for (y = y1; y <= y2; y++) if (pixels[y][x]) sum++; - return sum; -} - -static int32 sum_row_black_and_white(byte *row, int32 x1, int32 x2) -{ - int sum = 0, x, n = x2 - x1; - byte *p = row + x1; - for (x = 0; x <= n; x++) if (p[x]) sum++; - return sum; -} - -static void make_vcut(int32 a, int32 l, int32 w, int32 h, byte **pixels, - byte *sig, int32 k, - int32 s_row(byte *, int32, int32), - int32 s_col(byte **, int32, int32, int32), - int32 size); - -static void make_hcut(int32 a, int32 l, int32 w, int32 h, - byte **pixels, byte *sig, int32 k, - int32 s_row(byte *, int32, int32), - int32 s_col(byte **, int32, int32, int32), - int32 size) -{ - int32 cut = 0; /* how many rows are in the top part */ - int32 up_weight = 0; - - if (k >= size) return; - - if (a) - { - int32 last_row_weight = 0; - - assert(w && h); - - while ((up_weight << 1) < a) - { - last_row_weight = s_row(pixels[cut], l, l + w - 1); - up_weight += last_row_weight; - cut++; - } - cut--; - up_weight -= last_row_weight; - sig[k] = (byte) ((256 * - (cut * w + w * ((a >> 1) - up_weight) / last_row_weight)) - / (w * h)); - if (a - (up_weight << 1) > last_row_weight) - { - cut++; - up_weight += last_row_weight; - } - } - else - { - cut = h / 2; - sig[k] = 128; - } - - make_vcut(up_weight, l, w, cut, pixels, sig, k << 1, s_row, s_col, size); - make_vcut(a - up_weight, l, w, h - cut, pixels + cut, sig, (k << 1) | 1, s_row, s_col, size); -} - -static void make_vcut(int32 a, int32 l, int32 w, int32 h, - byte **pixels, byte *sig, int32 k, - int32 s_row(byte *, int32, int32), - int32 s_col(byte **, int32, int32, int32), - int32 size) -{ - int32 cut = 0; /* how many columns are in the left part */ - int32 left_weight = 0; - - if (k >= size) return; - - if (a) - { - int32 last_col_weight = 0; - - assert(w && h); - - while ((left_weight << 1) < a) - { - last_col_weight = s_col(pixels, l + cut, 0, h-1); - left_weight += last_col_weight; - cut++; - } - cut--; - left_weight -= last_col_weight; - sig[k] = (byte) ((256 * - (cut * h + h * ((a >> 1) - left_weight) / last_col_weight)) - / (w * h)); - if (a - (left_weight << 1) > last_col_weight) - { - cut++; left_weight += last_col_weight; - } - } - else - { - cut = w / 2; - sig[k] = 128; - } - - make_hcut(left_weight, l, cut, h, pixels, sig, k << 1, s_row, s_col, size); - make_hcut(a - left_weight, l + cut, w - cut, h, pixels, sig, (k << 1) | 1, s_row, s_col, size); -} - -static void get_signature(int32 width, int32 height, byte **pixels, byte *sig, - int32 s_row(byte *, int32, int32), - int32 s_col(byte **, int32, int32, int32), - int32 size) -{ - int32 area = 0, i; - for (i = 0; i < height; i++) - { - area += s_row(pixels[i], 0, width - 1); - } - /* FIXME: sig[0] is wasted */ - make_hcut(area, 0, width, height, pixels, sig, 1, s_row, s_col, size); -} - -MDJVU_IMPLEMENT void mdjvu_get_gray_signature(byte **data, int32 w, int32 h, - byte *result, int32 size) -{ - get_signature(w, h, data, result, sum_row_gray, sum_column_gray, size); -} - -MDJVU_IMPLEMENT void mdjvu_get_black_and_white_signature - (byte **data, int32 w, int32 h, - byte *result, int32 size) -{ - get_signature(w, h, data, result, sum_row_black_and_white, sum_column_black_and_white, size); -} diff --git a/jb2cmp/frames.cpp b/jb2cmp/frames.cpp deleted file mode 100644 index 0cf6d1d..0000000 --- a/jb2cmp/frames.cpp +++ /dev/null @@ -1,417 +0,0 @@ -/* minidjvu - library for handling bilevel images with DjVuBitonal support - * - * frames.c - extracting frameworks and calculating "importance rating" - * - * Copyright (C) 2005 Ilya Mezhirov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * - * minidjvu is derived from DjVuLibre (http://djvu.sourceforge.net) - * All over DjVuLibre there is a patent alert from LizardTech - * which I guess I should reproduce (don't ask me what does this mean): - * - * ------------------------------------------------------------------ - * | DjVu (r) Reference Library (v. 3.5) - * | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. - * | The DjVu Reference Library is protected by U.S. Pat. No. - * | 6,058,214 and patents pending. - * | - * | This software is subject to, and may be distributed under, the - * | GNU General Public License, either Version 2 of the license, - * | or (at your option) any later version. The license should have - * | accompanied the software or you may obtain a copy of the license - * | from the Free Software Foundation at http://www.fsf.org . - * | - * | The computer code originally released by LizardTech under this - * | license and unmodified by other parties is deemed "the LIZARDTECH - * | ORIGINAL CODE." Subject to any third party intellectual property - * | claims, LizardTech grants recipient a worldwide, royalty-free, - * | non-exclusive license to make, use, sell, or otherwise dispose of - * | the LIZARDTECH ORIGINAL CODE or of programs derived from the - * | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU - * | General Public License. This grant only confers the right to - * | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to - * | the extent such infringement is reasonably necessary to enable - * | recipient to make, have made, practice, sell, or otherwise dispose - * | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to - * | any greater extent that may be necessary to utilize further - * | modifications or combinations. - * | - * | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY - * | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED - * | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF - * | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - * +------------------------------------------------------------------ - */ - -/* Frameworks are funny things... - * The algorithm is to be commented yet. - * Here's a picture illustrating what is a frame - * (view with monospace font): - * - * Original letter: Its framework: - * - .....@@@@@@@@........ ..................... - ...@@@@@@@@@@@@...... ......@@@@@.......... - ..@@@@@@@@@@@@@@..... .....@@...@@@........ - ..@@@@@...@@@@@@@.... ....@@......@@....... - ..@@@@.....@@@@@@.... ....@........@....... - .@@@@@.....@@@@@@.... ....@........@....... - .@@@@@.....@@@@@@.... ....@........@....... - ..@@@@.....@@@@@@.... ....@........@....... - ..........@@@@@@@.... .............@....... - .......@@@@@@@@@@.... .............@....... - .....@@@@@@@@@@@@.... ........@@@@@@....... - ...@@@@@@@@@@@@@@.... ......@@@....@....... - ..@@@@@@@..@@@@@@.... .....@@......@....... - .@@@@@@....@@@@@@.... ...@@@.......@....... - .@@@@@.....@@@@@@.... ...@.........@....... - @@@@@......@@@@@@.... ..@@.........@....... - @@@@@......@@@@@@.... ..@..........@....... - @@@@@.....@@@@@@@.... ..@..........@....... - @@@@@@....@@@@@@@.@@@ ..@@.........@....... - .@@@@@@@@@@@@@@@@@@@@ ...@@.....@@@@@...... - .@@@@@@@@@@@@@@@@@@@@ ....@@..@@@...@@@@@.. - ..@@@@@@@@@.@@@@@@@@. .....@@@@............ - ....@@@@@....@@@@@... ..................... - - * A letter is converted into grayshades, - * and a frame is the set of its purely black pixels after the transformation. - * In the grayshade version of a letter, - * all pixels that were white remain absolutely white, - * the frame is black and the blackness falls down from it to the border. - */ - -/* Offtopic: I wonder if this thing could help OCRing because frameworks - * perfectly retain readability while becoming essentially 1-dimensional. - */ - -#include "mdjvucfg.h" -#include "minidjvu.h" -#include -#include -#include -#include - -/* Stuff for not using malloc in C++ - * (made by Leon Bottou; has no use in minidjvu, - * but left here for potential DjVuLibre compatibility) - */ -#ifdef __cplusplus -# define MALLOC(Type) new Type -# define FREE(p) delete p -# define MALLOCV(Type,n) new Type[n] -# define FREEV(p) delete [] p -#else -# define MALLOC(Type) ((Type*)malloc(sizeof(Type))) -# define FREE(p) do{if(p)free(p);}while(0) -# define MALLOCV(Type,n) ((Type*)malloc(sizeof(Type)*(n))) -# define FREEV(p) do{if(p)free(p);}while(0) -#endif - - -/* This determines the gray level of the border (ratio of black). - * Setting it to 1 will effectively eliminate grayshading. - */ -#define BORDER_FALLOFF .7 /* this is the main constant in all the matcher... */ - -typedef unsigned char byte; - -static int donut_connectivity_test(byte *upper, byte *row, byte *lower)/*{{{*/ -{ - /*(on the pictures below 0 is white, 1 is black or gray) - * - * 01. - * 1 . -> 1 - * ... - * - * .0. - * 1 1 -> 1 - * .0. - * - * all others -> 0 - */ - - int sum, l, u, d, r; - - sum = (u = *upper ? 1 : 0) + (d = *lower ? 1 : 0) + - (l = row[-1] ? 1 : 0) + (r = row[1] ? 1 : 0); - - switch(sum) - { - case 3:/*{{{*/ - { - int x = 6 - (u + (l << 1) + d + (d << 1)); - switch(x) - { - case 0: /* l */ - return upper[-1] && lower[-1] ? 0 : 1; - case 1: /* d */ - return lower[-1] && lower[1] ? 0 : 1; - case 2: /* r */ - return upper[1] && lower[1] ? 0 : 1; - case 3: /* u */ - return upper[-1] && upper[1] ? 0 : 1; - default: assert(0); return 0; - } - } - break;/*}}}*/ - case 2:/*{{{*/ - { - int s = l + r; - if (s & 1) - { - /* A1. - * 1 0 - should be !A (2x2 square extermination) - * .0. - */ - if (l) - { - if (u) - return upper[-1] ? 0 : 1; - else - return lower[-1] ? 0 : 1; - } - else /* r */ - { - if (u) - return upper[1] ? 0 : 1; - else - return lower[1] ? 0 : 1; - } - } - else - { - /* .0. - * 1 1 - surely should be 1 to preserve connection - * .0. - */ - return 1; - } - } - break;/*}}}*/ - case 0: case 4: - return 1; - case 1: - return 0; - default: assert(0); return 0; - } - assert(0); return 0; -}/*}}}*/ -static byte donut_transform_pixel(byte *upper, byte *row, byte *lower)/*{{{*/ -{ - /* (center pixel should be gray in order for this to work) - * (on the pictures below 0 is white, 1 is black or gray) - * - * 01. - * 1 . -> center will become 1 - * ... - * - * .0. - * 1 1 -> center will become 1 - * .0. - * - * 00. - * 1 0 -> center will become 1 - * .0. - * - * 1.. - * 1 0 -> center will become 0 - * 1.. - * - * 11. - * 1 0 -> center will become 0 - * .0. - * - * .A. - * A A -> center will become 1 - * .A. - */ - - int sum, l, u, d, r; - if (!*row) return 0; - - sum = (u = *upper ? 1 : 0) + (d = *lower ? 1 : 0) + - (l = row[-1] ? 1 : 0) + (r = row[1] ? 1 : 0); - - switch(sum) - { - case 1: case 3:/*{{{*/ - { - int x = u + (l << 1) + d + (d << 1); - if (sum == 3) x = (6 - x) ^ 2; - switch(x) - { - case 0: /* r */ - return upper[1] && lower[1] ? 0 : 1; - case 1: /* u */ - return upper[-1] && upper[1] ? 0 : 1; - case 2: /* l */ - return upper[-1] && lower[-1] ? 0 : 1; - case 3: /* d */ - return lower[-1] && lower[1] ? 0 : 1; - default: assert(0); return 0; - } - } - break;/*}}}*/ - case 2:/*{{{*/ - { - int s = l + r; - if (s & 1) - { - /* A1. - * 1 0 - should be !A (2x2 square extermination) - * .0. - */ - if (l) - { - if (u) - return upper[-1] ? 0 : 1; - else - return lower[-1] ? 0 : 1; - } - else /* r */ - { - if (u) - return upper[1] ? 0 : 1; - else - return lower[1] ? 0 : 1; - } - } - else - { - /* .0. - * 1 1 - surely should be 1 to preserve connection - * .0. - */ - return 1; - } - } - break;/*}}}*/ - case 0: case 4: - return 1; /* lone pixels are NOT omitted */ - default: assert(0); return 0; - } - assert(0); return 0; -}/*}}}*/ - -/* `pixels' should have a margin of 1 pixel at each side - * returns true if the image was changed - */ -static int flay(byte **pixels, int w, int h, int rank, int **ranks) -{ - int i, j, result = 0; - - byte *buf = MALLOCV(byte, w * h); - - assert(pixels); - assert(w); - assert(h); - - for (i = 0; i < h; i++) for (j = 0; j < w; j++) - { - buf[w * i + j] = - donut_transform_pixel(pixels[i-1] + j, pixels[i] + j, pixels[i+1] + j); - } - - for (i = 0; i < h; i++) - { - byte *up = pixels[i-1], *row = pixels[i], *dn = pixels[i+1]; - byte *buf_row = buf + w * i; - int *rank_row = NULL; - if (ranks) rank_row = ranks[i]; - for (j = 0; j < w; j++) - { - if (row[j] && !buf_row[j]) - { - if (!donut_connectivity_test(up + j, row + j, dn + j)) - { - row[j] = buf_row[j]; - if (rank) rank_row[j] = rank; - result = 1; - } - } - else - row[j] = buf_row[j]; - } - } - - FREEV(buf); - return result; -} - -/* TODO: use less temporary buffers and silly copyings */ -MDJVU_IMPLEMENT void mdjvu_soften_pattern(byte **result, byte **pixels, int32 w, int32 h)/*{{{*/ -{ - byte *r = MALLOCV(byte, (w + 2) * (h + 2)); - byte **pointers = MALLOCV(byte *, h + 2); - int *ranks_buf = MALLOCV(int, w * h); - int **ranks = MALLOCV(int *, h); - - int i, j, passes = 1; - double level = 1, falloff; - byte *colors; - - memset(r, 0, (w + 2) * (h + 2)); - memset(ranks_buf, 0, w * h * sizeof(int)); - - for (i = 0; i < h + 2; i++) - pointers[i] = r + (w + 2) * i + 1; - - for (i = 0; i < h; i++) - memcpy(pointers[i+1], pixels[i], w); - - for (i = 0; i < h; i++) - ranks[i] = ranks_buf + w * i; - - while(flay(pointers + 1, w, h, passes, ranks)) passes++; - - colors = MALLOCV(byte, passes + 1); - - falloff = pow(BORDER_FALLOFF, 1./passes); - - for (i = 0; i < passes; i++) - { - colors[i] = (byte) (level * 255); - level *= falloff; - } - /* TODO: test the next line */ - /* colors[passes - 1] = 50; pay less attention to border pixels */ - colors[passes] = 0; - - pointers++; - for (i = 0; i < h; i++) - { - for (j = 0; j < w; j++) - { - if (pointers[i][j]) - { - result[i][j] = 255; - } - else - { - result[i][j] = colors[passes - ranks[i][j]]; - } - } - } - pointers--; - - FREEV(colors); - FREEV(ranks); - FREEV(ranks_buf); - FREEV(r); - FREEV(pointers); -}/*}}}*/ diff --git a/jb2cmp/mdjvucfg.h b/jb2cmp/mdjvucfg.h deleted file mode 100644 index e69de29..0000000 diff --git a/jb2cmp/minidjvu.h b/jb2cmp/minidjvu.h deleted file mode 100644 index 6664daf..0000000 --- a/jb2cmp/minidjvu.h +++ /dev/null @@ -1,75 +0,0 @@ -/* minidjvu - library for handling bilevel images with DjVuBitonal support - * - * no_mdjvu.h - stuff for compiling the pattern matcher outside of minidjvu - * - * Copyright (C) 2005 Ilya Mezhirov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * - * minidjvu is derived from DjVuLibre (http://djvu.sourceforge.net) - * All over DjVuLibre there is a patent alert from LizardTech - * which I guess I should reproduce (don't ask me what does this mean): - * - * ------------------------------------------------------------------ - * | DjVu (r) Reference Library (v. 3.5) - * | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. - * | The DjVu Reference Library is protected by U.S. Pat. No. - * | 6,058,214 and patents pending. - * | - * | This software is subject to, and may be distributed under, the - * | GNU General Public License, either Version 2 of the license, - * | or (at your option) any later version. The license should have - * | accompanied the software or you may obtain a copy of the license - * | from the Free Software Foundation at http://www.fsf.org . - * | - * | The computer code originally released by LizardTech under this - * | license and unmodified by other parties is deemed "the LIZARDTECH - * | ORIGINAL CODE." Subject to any third party intellectual property - * | claims, LizardTech grants recipient a worldwide, royalty-free, - * | non-exclusive license to make, use, sell, or otherwise dispose of - * | the LIZARDTECH ORIGINAL CODE or of programs derived from the - * | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU - * | General Public License. This grant only confers the right to - * | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to - * | the extent such infringement is reasonably necessary to enable - * | recipient to make, have made, practice, sell, or otherwise dispose - * | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to - * | any greater extent that may be necessary to utilize further - * | modifications or combinations. - * | - * | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY - * | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED - * | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF - * | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - * +------------------------------------------------------------------ - */ - - -/* - * to compile the pattern matcher without the rest of minidjvu, - * do this: - * - * mv no_mdjvu.h minidjvu.h - * touch mdjvucfg.h - * g++ -c *.cpp - */ - -#define NO_MINIDJVU -#define MDJVU_FUNCTION -#define MDJVU_IMPLEMENT -typedef int int32; -#include "patterns.h" -#include "classify.h" /* to compile it with the classificator */ diff --git a/jb2cmp/patterns.cpp b/jb2cmp/patterns.cpp deleted file mode 100644 index b31cf4e..0000000 --- a/jb2cmp/patterns.cpp +++ /dev/null @@ -1,626 +0,0 @@ -/* minidjvu - library for handling bilevel images with DjVuBitonal support - * - * patterns.c - pattern matching algorithm - * - * Copyright (C) 2005 Ilya Mezhirov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * - * minidjvu is derived from DjVuLibre (http://djvu.sourceforge.net) - * All over DjVuLibre there is a patent alert from LizardTech - * which I guess I should reproduce (don't ask me what does this mean): - * - * ------------------------------------------------------------------ - * | DjVu (r) Reference Library (v. 3.5) - * | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. - * | The DjVu Reference Library is protected by U.S. Pat. No. - * | 6,058,214 and patents pending. - * | - * | This software is subject to, and may be distributed under, the - * | GNU General Public License, either Version 2 of the license, - * | or (at your option) any later version. The license should have - * | accompanied the software or you may obtain a copy of the license - * | from the Free Software Foundation at http://www.fsf.org . - * | - * | The computer code originally released by LizardTech under this - * | license and unmodified by other parties is deemed "the LIZARDTECH - * | ORIGINAL CODE." Subject to any third party intellectual property - * | claims, LizardTech grants recipient a worldwide, royalty-free, - * | non-exclusive license to make, use, sell, or otherwise dispose of - * | the LIZARDTECH ORIGINAL CODE or of programs derived from the - * | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU - * | General Public License. This grant only confers the right to - * | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to - * | the extent such infringement is reasonably necessary to enable - * | recipient to make, have made, practice, sell, or otherwise dispose - * | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to - * | any greater extent that may be necessary to utilize further - * | modifications or combinations. - * | - * | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY - * | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED - * | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF - * | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - * +------------------------------------------------------------------ - */ - -/* This is `patterns.c', the unit that handles pattern matching. - * Its task is only to compare pairs of images, not to classify a set of them. - * And this has absolutely nothing to do with choosing a cross-coding prototype. - */ - -#include "mdjvucfg.h" -#include "minidjvu.h" -#include -#include -#include - - -/* Stuff for not using malloc in C++ - * (made by Leon Bottou; has no use in minidjvu, - * but left here for potential DjVuLibre compatibility) - */ -#ifdef __cplusplus -# define MALLOC(Type) new Type -# define FREE(p) delete p -# define MALLOCV(Type,n) new Type[n] -# define FREEV(p) delete [] p -#else -# define MALLOC(Type) ((Type*)malloc(sizeof(Type))) -# define FREE(p) do{if(p)free(p);}while(0) -# define MALLOCV(Type,n) ((Type*)malloc(sizeof(Type)*(n))) -# define FREEV(p) do{if(p)free(p);}while(0) -#endif - - -#define SIGNATURE_SIZE 32 - -/* Mass center coordinates are stored in (1/MASS_CENTER_QUANT) pixels. - * This leads to more precise alignment then using whole pixels. - */ -#define MASS_CENTER_QUANT 8 - - -/* These are hand-tweaked parameters of this classifier. */ - -typedef struct -{ - double pithdiff_threshold; - double softdiff_threshold; - double shiftdiff1_threshold; - double shiftdiff2_threshold; - double shiftdiff3_threshold; -} Options; - -static const double pithdiff_veto_threshold = 23; -static const double softdiff_veto_threshold = 100; /* that means off */ -static const double shiftdiff1_veto_threshold = 1000; -static const double shiftdiff2_veto_threshold = 1500; -static const double shiftdiff3_veto_threshold = 2000; - -static const double size_difference_threshold = 10; -static const double mass_difference_threshold = 15; - -static const double shiftdiff1_falloff = .9; -static const double shiftdiff2_falloff = 1; -static const double shiftdiff3_falloff = 1.15; - -static void interpolate(Options *opt, const double *v1, const double *v2, - int l, int r, int x) -{ - double w1 = ((double)(r - x)) / (r - l); /* weights */ - double w2 = 1 - w1; - opt->pithdiff_threshold = v1[0] * w1 + v2[0] * w2; - opt->softdiff_threshold = v1[1] * w1 + v2[1] * w2; - opt->shiftdiff1_threshold = v1[2] * w1 + v2[2] * w2; - opt->shiftdiff2_threshold = v1[3] * w1 + v2[3] * w2; - opt->shiftdiff3_threshold = v1[4] * w1 + v2[4] * w2; -} - - -/* Sets `aggression' for pattern matching. - * Lower values are safer, bigger values produce smaller files. - */ - -MDJVU_IMPLEMENT void mdjvu_set_aggression(mdjvu_matcher_options_t opt, int level) -{ - const double set200[5] = {7, 15, 60, 80, 170}; - const double set150[5] = {5, 13, 50, 70, 160}; - const double set0[5] = {0, 0, 0, 0, 0}; - - if (level < 0) level = 0; - - if (level > 150) - interpolate((Options *) opt, set150, set200, 150, 200, level); - else - interpolate((Options *) opt, set0, set150, 0, 150, level); -} - -/* ========================================================================== */ - -MDJVU_IMPLEMENT mdjvu_matcher_options_t mdjvu_matcher_options_create(void) -{ - mdjvu_matcher_options_t options = (mdjvu_matcher_options_t) MALLOC(Options); - mdjvu_set_aggression(options, 100); - return options; -} - -MDJVU_IMPLEMENT void mdjvu_matcher_options_destroy(mdjvu_matcher_options_t opt) -{ - FREE((Options *) opt); -} - - -/* FIXME: maxint is maxint32 */ -static const int32 maxint = ~(1 << (sizeof(int32) * 8 - 1)); -typedef unsigned char byte; - -typedef struct ComparableImageData -{ - byte **pixels; /* 0 - purely white, 255 - purely black (inverse to PGM!) */ - int32 width, height, mass; - int32 mass_center_x, mass_center_y; - byte signature[SIGNATURE_SIZE]; /* for shiftdiff 1 and 3 tests */ - byte signature2[SIGNATURE_SIZE]; /* for shiftdiff 2 test */ -} Image; - - - -/* Each image pair undergoes simple tests (dimensions and mass) - * and at most five more advanced tests. - * Each test may end up with three outcomes: veto (-1), doubt (0) and match(1). - * Images are equivalent if and only if - * there was no `veto' - * and there was at least one `match'. - */ - - -/* We check whether images' dimensions are different - * no more than by size_difference_threshold percent. - * Return value is usual: veto (-1) or doubt (0). - * Mass checking was introduced by Leon Bottou. - */ - -static int simple_tests(Image *i1, Image *i2) -{ - int32 w1 = i1->width, h1 = i1->height, m1 = i1->mass; - int32 w2 = i2->width, h2 = i2->height, m2 = i2->mass; - - if (100.* w1 > (100.+ size_difference_threshold) * w2) return -1; - if (100.* w2 > (100.+ size_difference_threshold) * w1) return -1; - if (100.* h1 > (100.+ size_difference_threshold) * h2) return -1; - if (100.* h2 > (100.+ size_difference_threshold) * h1) return -1; - if (100.* m1 > (100.+ mass_difference_threshold) * m2) return -1; - if (100.* m2 > (100.+ mass_difference_threshold) * m1) return -1; - - return 0; -} - - -#define USE_PITHDIFF 1 -#define USE_SOFTDIFF 1 -#define USE_SHIFTDIFF_1 1 -#define USE_SHIFTDIFF_2 1 -#define USE_SHIFTDIFF_3 1 - - -/* Computing distance by comparing pixels {{{ */ - -#if USE_PITHDIFF || USE_SOFTDIFF - -/* This function compares two images pixel by pixel. - * The exact way to compare pixels is defined by two functions, - * compare_row and compare_with_white. - * Both functions take pointers to byte rows and their length. - * - * Now images are aligned by mass centers. - * Code needs some clarification, yes... - */ -static int32 distance_by_pixeldiff_functions_by_shift(Image *i1, Image *i2, - int32 (*compare_row)(byte *, byte *, int32), - int32 (*compare_with_white)(byte *, int32), int32 ceiling, - int32 shift_x, int32 shift_y) /* of i1's coordinate system with respect to i2 */ -{ - int32 w1 = i1->width, w2 = i2->width, h1 = i1->height, h2 = i2->height; - int32 min_y = shift_y < 0 ? shift_y : 0; - int32 right1 = shift_x + w1; - int32 max_y_plus_1 = h2 > shift_y + h1 ? h2 : shift_y + h1; - int32 i; - int32 min_overlap_x = shift_x > 0 ? shift_x : 0; - int32 max_overlap_x_plus_1 = w2 < right1 ? w2 : right1; - int32 min_overlap_x_for_i1 = min_overlap_x - shift_x; - int32 max_overlap_x_plus_1_for_i1 = max_overlap_x_plus_1 - shift_x; - int32 overlap_length = max_overlap_x_plus_1 - min_overlap_x; - int32 score = 0; - - if (overlap_length <= 0) return maxint; - - for (i = min_y; i < max_y_plus_1; i++) - { - int32 y1 = i - shift_y; - - /* calculate difference in the i-th line */ - - if (i < 0 || i >= h2) - { - /* calculate difference of i1 with white */ - score += compare_with_white(i1->pixels[y1], w1); - } - else if (i < shift_y || i >= shift_y + h1) - { - /* calculate difference of i2 with white */ - score += compare_with_white(i2->pixels[i], w2); - } - else - { - /* calculate difference in a line where the bitmaps overlap */ - score += compare_row(i1->pixels[y1] + min_overlap_x_for_i1, - i2->pixels[i] + min_overlap_x, - overlap_length); - - - /* calculate penalty for the left margin */ - if (min_overlap_x > 0) - score += compare_with_white(i2->pixels[i], min_overlap_x); - else - score += compare_with_white(i1->pixels[y1], min_overlap_x_for_i1); - - /* calculate penalty for the right margin */ - if (max_overlap_x_plus_1 < w2) - { - score += compare_with_white( - i2->pixels[i] + max_overlap_x_plus_1, - w2 - max_overlap_x_plus_1); - } - else - { - score += compare_with_white( - i1->pixels[y1] + max_overlap_x_plus_1_for_i1, - w1 - max_overlap_x_plus_1_for_i1); - - } - } - - if (score >= ceiling) return maxint; - } - return score; -} - -static int32 distance_by_pixeldiff_functions(Image *i1, Image *i2, - int32 (*compare_row)(byte *, byte *, int32), - int32 (*compare_with_white)(byte *, int32), int32 ceiling) -{ - int32 w1, w2, h1, h2; - int32 shift_x, shift_y; /* of i1's coordinate system with respect to i2 */ - - /* make i1 to be narrower than i2 */ - if (i1->width > i2->width) - { - Image *img = i1; - i1 = i2; - i2 = img; - } - - w1 = i1->width; h1 = i1->height; - w2 = i2->width; h2 = i2->height; - - /* (shift_x, shift_y) */ - /* is what should be added to i1's coordinates to get i2's coordinates. */ - shift_x = (w2 - w2/2) - (w1 - w1/2); /* center favors right */ - shift_y = h2/2 - h1/2; /* center favors top */ - - shift_x = i2->mass_center_x - i1->mass_center_x; - if (shift_x < 0) - shift_x = (shift_x - MASS_CENTER_QUANT / 2) / MASS_CENTER_QUANT; - else - shift_x = (shift_x + MASS_CENTER_QUANT / 2) / MASS_CENTER_QUANT; - - shift_y = i2->mass_center_y - i1->mass_center_y; - if (shift_y < 0) - shift_y = (shift_y - MASS_CENTER_QUANT / 2) / MASS_CENTER_QUANT; - else - shift_y = (shift_y + MASS_CENTER_QUANT / 2) / MASS_CENTER_QUANT; - - return distance_by_pixeldiff_functions_by_shift( - i1, i2, compare_row, compare_with_white, ceiling, shift_x, shift_y); -} - -#endif - -/* Computing distance by comparing pixels }}} */ -/* inscribed framework penalty counting {{{ */ - -/* (Look at `frames.c' to see what it's all about) */ - -#if USE_PITHDIFF - -/* If the framework of one letter is inscribed into another and vice versa, - * then those letters are probably equivalent. - * That's the idea... - * Counting penalty points here for any pixel - * that's framework in one image and white in the other. - */ - -static int32 pithdiff_compare_row(byte *row1, byte *row2, int32 n) -{ - int32 i, s = 0; - for (i = 0; i < n; i++) - { - int32 k = row1[i], l = row2[i]; - if (k == 255) - s += 255 - l; - else if (l == 255) - s += 255 - k; - } - return s; -} - -static int32 pithdiff_compare_with_white(byte *row, int32 n) -{ - int32 i, s = 0; - for (i = 0; i < n; i++) if (row[i] == 255) s += 255; - return s; -} - -static int32 pithdiff_distance(Image *i1, Image *i2, int32 ceiling) -{ - return distance_by_pixeldiff_functions(i1, i2, - &pithdiff_compare_row, &pithdiff_compare_with_white, ceiling); -} - -static int pithdiff_equivalence(Image *i1, Image *i2, double threshold, int32 dpi) -{ - int32 perimeter = i1->width + i1->height + i2->width + i2->height; - double ceiling = pithdiff_veto_threshold * dpi * perimeter / 100; - int32 d = pithdiff_distance(i1, i2, (int32) ceiling); - if (d == maxint) return -1; - if (d < threshold * dpi * perimeter / 100) return 1; - return 0; -} - -#endif /* if USE_PITHDIFF */ - -/* inscribed framework penalty counting }}} */ -/* soft penalty counting {{{ */ - -#if USE_SOFTDIFF - -/* This test scores penalty points for pixels that are different in both images. - * Since every black pixel has a rating of importance, - * the penalty for a pair of corresponding pixels, one black, one white, - * is equal to the rating of the black pixel. - */ - -static int32 softdiff_compare_row(byte *row1, byte *row2, int32 n) -{ - int32 i, s = 0; - for (i = 0; i < n; i++) - { - if (!row1[i]) - s += row2[i]; - else if (!row2[i]) - s += row1[i]; - } - return s; -} - -static int32 softdiff_compare_with_white(byte *row, int32 n) -{ - int32 i, s = 0; - for (i = 0; i < n; i++) s += row[i]; - return s; -} - -static int32 softdiff_distance(Image *i1, Image *i2, int32 ceiling) -{ - return distance_by_pixeldiff_functions(i1, i2, - &softdiff_compare_row, &softdiff_compare_with_white, ceiling); -} - -static int softdiff_equivalence(Image *i1, Image *i2, double threshold, int32 dpi) -{ - int32 perimeter = i1->width + i1->height + i2->width + i2->height; - double ceiling = softdiff_veto_threshold * dpi * perimeter / 100; - int32 d = softdiff_distance(i1, i2, (int32) ceiling); - if (d == maxint) return -1; - if (d < threshold * dpi * perimeter / 100) return 1; - return 0; -} - -#endif /* if USE_SOFTDIFF */ - -/* soft penalty counting }}} */ -/* shift signature comparison {{{ */ - -/* Just finding the square of a normal Euclidean distance between vectors - * (but with falloff) - */ - -#if USE_SHIFTDIFF_1 || USE_SHIFTDIFF_2 || USE_SHIFTDIFF_3 -static int shiftdiff_equivalence(byte *s1, byte *s2, double falloff, double veto, double threshold) -{ - int i, delay_before_falloff = 1, delay_counter = 1; - double penalty = 0; - double weight = 1; - - for (i = 1; i < SIGNATURE_SIZE; i++) /* kluge: ignores the first byte */ - { - int difference = s1[i] - s2[i]; - penalty += difference * difference * weight; - if (!--delay_counter) - { - weight *= falloff; - delay_counter = delay_before_falloff <<= 1; - } - } - - if (penalty >= veto * SIGNATURE_SIZE) return -1; - if (penalty <= threshold * SIGNATURE_SIZE) return 1; - return 0; -} -#endif -/* shift signature comparison }}} */ - -#ifndef NO_MINIDJVU -mdjvu_pattern_t mdjvu_pattern_create(mdjvu_bitmap_t bitmap) -{ - int32 w = mdjvu_bitmap_get_width(bitmap); - int32 h = mdjvu_bitmap_get_height(bitmap); - mdjvu_pattern_t pattern; - byte **pixels = mdjvu_create_2d_array(w, h); - mdjvu_bitmap_unpack_all(bitmap, pixels); - pattern = mdjvu_pattern_create_from_array(pixels, w, h); - mdjvu_destroy_2d_array(pixels); - return pattern; -} -#endif - -/* Finding mass center {{{ */ - -static void get_mass_center(unsigned char **pixels, int32 w, int32 h, - int32 *pmass_center_x, int32 *pmass_center_y) -{ - double x_sum = 0, y_sum = 0, mass = 0; - int32 i, j; - - for (i = 0; i < h; i++) - { - unsigned char *row = pixels[i]; - for (j = 0; j < w; j++) - { - unsigned char pixel = row[j]; - x_sum += pixel * j; - y_sum += pixel * i; - mass += pixel; - } - } - - *pmass_center_x = (int32) (x_sum * MASS_CENTER_QUANT / mass); - *pmass_center_y = (int32) (y_sum * MASS_CENTER_QUANT / mass); -} - -/* Finding mass center }}} */ - - -MDJVU_IMPLEMENT mdjvu_pattern_t mdjvu_pattern_create_from_array(byte **pixels, int32 w, int32 h)/*{{{*/ -{ - int32 i, mass; - Image *img = MALLOC(Image); - byte *pool = MALLOCV(byte, w * h); - memset(pool, 0, w * h); - - img->width = w; - img->height = h; - - img->pixels = MALLOCV(byte *, h); - for (i = 0; i < h; i++) - img->pixels[i] = pool + i * w; - - mass = 0; - for (i = 0; i < h; i++) - { - int32 j; - for (j = 0; j < w; j++) - if (pixels[i][j]) - { - img->pixels[i][j] = 255; /* i don't remember what for */ - mass += 1; - } - } - img->mass = mass; - - mdjvu_soften_pattern(img->pixels, img->pixels, w, h); - get_mass_center(img->pixels, w, h, - &img->mass_center_x, &img->mass_center_y); - mdjvu_get_gray_signature(img->pixels, w, h, - img->signature, SIGNATURE_SIZE); - mdjvu_get_black_and_white_signature(img->pixels, w, h, - img->signature2, SIGNATURE_SIZE); - return (mdjvu_pattern_t) img; -}/*}}}*/ - -/* Requires `opt' to be non-NULL */ -static int compare_patterns(mdjvu_pattern_t ptr1, mdjvu_pattern_t ptr2,/*{{{*/ - int32 dpi, Options *opt) - -{ - Image *i1 = (Image *) ptr1, *i2 = (Image *) ptr2; - int i, state = 0; /* 0 - unsure, 1 - equal unless veto */ - - if (simple_tests(i1, i2)) return -1; - - #if USE_SHIFTDIFF_1 - i = shiftdiff_equivalence(i1->signature, i2->signature, - shiftdiff1_falloff, shiftdiff1_veto_threshold, opt->shiftdiff1_threshold); - if (i == -1) return -1; - state |= i; - #endif - - #if USE_SHIFTDIFF_2 - i = shiftdiff_equivalence(i1->signature2, i2->signature2, - shiftdiff2_falloff, shiftdiff2_veto_threshold, opt->shiftdiff2_threshold); - if (i == -1) return -1; - state |= i; - #endif - - #if USE_SHIFTDIFF_3 - i = shiftdiff_equivalence(i1->signature, i2->signature, - shiftdiff3_falloff, shiftdiff3_veto_threshold, opt->shiftdiff3_threshold); - if (i == -1) return -1; - state |= i; - #endif - - #if USE_PITHDIFF - i = pithdiff_equivalence(i1, i2, opt->pithdiff_threshold, dpi); - if (i == -1) return 0; /* pithdiff has no right to veto at upper level */ - state |= i; - #endif - - #if USE_SOFTDIFF - i = softdiff_equivalence(i1, i2, opt->softdiff_threshold, dpi); - if (i == -1) return 0; /* softdiff has no right to veto at upper level */ - state |= i; - #endif - - return state; -}/*}}}*/ - -MDJVU_IMPLEMENT int mdjvu_match_patterns(mdjvu_pattern_t ptr1, mdjvu_pattern_t ptr2, - int32 dpi, mdjvu_matcher_options_t options) -{ - Options *opt; - int result; - if (options) - opt = (Options *) options; - else - opt = (Options *) mdjvu_matcher_options_create(); - - result = compare_patterns(ptr1, ptr2, dpi, opt); - - if (!options) - mdjvu_matcher_options_destroy((mdjvu_matcher_options_t) opt); - - return result; -} - - -MDJVU_IMPLEMENT void mdjvu_pattern_destroy(mdjvu_pattern_t p)/*{{{*/ -{ - Image *img = (Image *) p; - FREEV(img->pixels[0]); - FREEV(img->pixels); - FREE(img); -}/*}}}*/ diff --git a/jb2cmp/patterns.h b/jb2cmp/patterns.h deleted file mode 100644 index ca8e235..0000000 --- a/jb2cmp/patterns.h +++ /dev/null @@ -1,131 +0,0 @@ -/* minidjvu - library for handling bilevel images with DjVuBitonal support - * - * patterns.h - matching patterns - * - * Copyright (C) 2005 Ilya Mezhirov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * - * minidjvu is derived from DjVuLibre (http://djvu.sourceforge.net) - * All over DjVuLibre there is a patent alert from LizardTech - * which I guess I should reproduce (don't ask me what does this mean): - * - * ------------------------------------------------------------------ - * | DjVu (r) Reference Library (v. 3.5) - * | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. - * | The DjVu Reference Library is protected by U.S. Pat. No. - * | 6,058,214 and patents pending. - * | - * | This software is subject to, and may be distributed under, the - * | GNU General Public License, either Version 2 of the license, - * | or (at your option) any later version. The license should have - * | accompanied the software or you may obtain a copy of the license - * | from the Free Software Foundation at http://www.fsf.org . - * | - * | The computer code originally released by LizardTech under this - * | license and unmodified by other parties is deemed "the LIZARDTECH - * | ORIGINAL CODE." Subject to any third party intellectual property - * | claims, LizardTech grants recipient a worldwide, royalty-free, - * | non-exclusive license to make, use, sell, or otherwise dispose of - * | the LIZARDTECH ORIGINAL CODE or of programs derived from the - * | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU - * | General Public License. This grant only confers the right to - * | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to - * | the extent such infringement is reasonably necessary to enable - * | recipient to make, have made, practice, sell, or otherwise dispose - * | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to - * | any greater extent that may be necessary to utilize further - * | modifications or combinations. - * | - * | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY - * | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED - * | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF - * | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - * +------------------------------------------------------------------ - */ - -#ifndef MDJVU_PATTERNS_H -#define MDJVU_PATTERNS_H - - -/* To get an image ready for comparisons, one have to `prepare' it. - * A prepared image is called a `pattern' here. - */ - -/* the struct itself is not defined in this header */ -typedef struct MinidjvuPattern *mdjvu_pattern_t; - - -/* Allocate a pattern and calculate all necessary information. - * Memory consumption is byte per pixel + constant. - * The pattern would be completely independent on the bitmap given. - * (that is, you can destroy the bitmap immediately) - */ -#ifndef NO_MINIDJVU -MDJVU_FUNCTION mdjvu_pattern_t mdjvu_pattern_create(mdjvu_bitmap_t); -#endif - -/* Same, but create from two-dimensional array. - */ - -MDJVU_FUNCTION mdjvu_pattern_t mdjvu_pattern_create_from_array - (unsigned char **, int32 w, int32 h); - - -/* Destroy the pattern. */ - -MDJVU_FUNCTION void mdjvu_pattern_destroy(mdjvu_pattern_t); - - -typedef struct MinidjvuMatcherOptions *mdjvu_matcher_options_t; - -MDJVU_FUNCTION mdjvu_matcher_options_t mdjvu_matcher_options_create(void); -MDJVU_FUNCTION void mdjvu_set_aggression(mdjvu_matcher_options_t, int level); -MDJVU_FUNCTION void mdjvu_matcher_options_destroy(mdjvu_matcher_options_t); - - -/* Compare patterns. - * Returns - * +1 if images are considered equivalent, - * -1 if they are considered totally different (just to speed up things), - * 0 if unknown, but probably different. - * Exchanging the order of arguments should not change the outcome. - * If you have found that A ~ B and B ~ C, - * then you may assume A ~ C regardless of this function's result. - * - * Options may be NULL. - */ - -MDJVU_FUNCTION int mdjvu_match_patterns(mdjvu_pattern_t, mdjvu_pattern_t, - int32 dpi, - mdjvu_matcher_options_t); - - -/* Auxiliary functions used in pattern matcher (TODO: comment them) */ - -/* `result' and `pixels' may be the same array */ -MDJVU_FUNCTION void mdjvu_soften_pattern(unsigned char **result, - unsigned char **pixels, int32 w, int32 h); - -MDJVU_FUNCTION void mdjvu_get_gray_signature( - unsigned char **data, int32 w, int32 h, - unsigned char *result, int32 size); - -MDJVU_FUNCTION void mdjvu_get_black_and_white_signature( - unsigned char **data, int32 w, int32 h, - unsigned char *result, int32 size); - -#endif /* MDJVU_PATTERNS_H */ diff --git a/jb2tune.cpp b/jb2tune.cpp deleted file mode 100644 index fd4f91a..0000000 --- a/jb2tune.cpp +++ /dev/null @@ -1,397 +0,0 @@ -//C- -*- C++ -*- -//C- ------------------------------------------------------------------- -//C- DjVuLibre-3.5 -//C- Copyright (c) 2002 Leon Bottou and Yann Le Cun. -//C- Copyright (c) 2001 AT&T -//C- -//C- This software is subject to, and may be distributed under, the -//C- GNU General Public License, either Version 2 of the license, -//C- or (at your option) any later version. The license should have -//C- accompanied the software or you may obtain a copy of the license -//C- from the Free Software Foundation at http://www.fsf.org . -//C- -//C- This program is distributed in the hope that it will be useful, -//C- but WITHOUT ANY WARRANTY; without even the implied warranty of -//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//C- GNU General Public License for more details. -//C- -//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from -//C- Lizardtech Software. Lizardtech Software has authorized us to -//C- replace the original DjVu(r) Reference Library notice by the following -//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu): -//C- -//C- ------------------------------------------------------------------ -//C- | DjVu (r) Reference Library (v. 3.5) -//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. -//C- | The DjVu Reference Library is protected by U.S. Pat. No. -//C- | 6,058,214 and patents pending. -//C- | -//C- | This software is subject to, and may be distributed under, the -//C- | GNU General Public License, either Version 2 of the license, -//C- | or (at your option) any later version. The license should have -//C- | accompanied the software or you may obtain a copy of the license -//C- | from the Free Software Foundation at http://www.fsf.org . -//C- | -//C- | The computer code originally released by LizardTech under this -//C- | license and unmodified by other parties is deemed "the LIZARDTECH -//C- | ORIGINAL CODE." Subject to any third party intellectual property -//C- | claims, LizardTech grants recipient a worldwide, royalty-free, -//C- | non-exclusive license to make, use, sell, or otherwise dispose of -//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the -//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU -//C- | General Public License. This grant only confers the right to -//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to -//C- | the extent such infringement is reasonably necessary to enable -//C- | recipient to make, have made, practice, sell, or otherwise dispose -//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to -//C- | any greater extent that may be necessary to utilize further -//C- | modifications or combinations. -//C- | -//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY -//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF -//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. -//C- +------------------------------------------------------------------ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif -#if NEED_GNUG_PRAGMAS -# pragma implementation -#endif - -#include "DjVuGlobal.h" -#include "GException.h" -#include "GSmartPointer.h" -#include "GContainer.h" -#include "GRect.h" -#include "GBitmap.h" -#include "JB2Image.h" - -#include "jb2tune.h" - -#include "jb2cmp/minidjvu.h" -#include "jb2cmp/patterns.h" -#include "jb2cmp/classify.h" - -#include - -#define REFINE_THRESHOLD 21 - - - -// ---------------------------------------- -// UTILITIES - - -// Keep informations for pattern matching -struct MatchData -{ - GP bits; // bitmap pointer - int area; // number of black pixels - int match; // jb2cmp pattern match -}; - - -// Compute the number of black pixels. -static int -compute_area(GBitmap *bits) -{ - GBitmap &bitmap = *bits; - int w = bitmap.columns(); - int h = bitmap.rows(); - int black_pixels = 0; - for (int i=0; irows(); - int w = bits->columns(); - GTArray mass(h); - int i, j, m; - int tm = 0; - for (i=0; i0; m--) - if (row[j+m-1]) - break; - mass[i] = m; - tm += m; - } - m = 0; - i = 0; - while (m * 6 < tm * 4) - { - m += mass[i/4]; - i += 1; - } - return i; -} - - -// Fill the MatchData array for lossless compression -static void -compute_matchdata_lossless(JB2Image *jimg, MatchData *lib) -{ - int i; - int nshapes = jimg->get_shape_count(); - for (i=0; iget_shape(i); - lib[i].bits = 0; - lib[i].area = 0; - lib[i].match = -1; - if (! jshp.bits) continue; - if (jshp.userdata & JB2SHAPE_SPECIAL) continue; - lib[i].bits = jshp.bits; - lib[i].area = compute_area(jshp.bits); - } -} - - -// Interface with Ilya's data structures. -static mdjvu_pattern_t -compute_comparable_image(GBitmap *bits) -{ - int w = bits->columns(); - int h = bits->rows(); - GTArray p(h); - for (int i=0; iget_shape_count(); - // Prepare MatchData - GTArray handles(nshapes); - for (i=0; iget_shape(i); - lib[i].bits = 0; - lib[i].area = 0; - lib[i].match = -1; - handles[i] = 0; - if (! jshp.bits) continue; - if (jshp.userdata & JB2SHAPE_SPECIAL) continue; - lib[i].bits = jshp.bits; - lib[i].area = compute_area(jshp.bits); - handles[i] = compute_comparable_image(jshp.bits); - } - // Run Ilya's pattern matcher. - GTArray tags(nshapes); - int maxtag = mdjvu_classify_patterns(handles, tags, nshapes, dpi, options); - // Extract substitutions - GTArray reps(maxtag); - for (i=0; i<=maxtag; i++) - reps[i] = -1; - for (i=0; iget_shape_count(); - // Loop on all shapes - for (int current=0; currentget_shape(current); - // Process substitutions. - if (lossy && !(jshp.userdata & JB2SHAPE_LOSSLESS)) - { - int substitute = lib[current].match; - if (substitute >= 0) - { - jshp.parent = substitute; - lib[current].bits = 0; - continue; - } - } - // Leave special shapes alone. - if (! jshp.bits) continue; - if (jshp.userdata & JB2SHAPE_SPECIAL) continue; - // Compute matchdata info - GBitmap &bitmap = *(jshp.bits); - int rows = bitmap.rows(); - int cols = bitmap.columns(); - int best_score = (REFINE_THRESHOLD * rows * cols + 50) / 100; - int black_pixels = lib[current].area; - int closest = -1; - // Search cross-coding buddy - bitmap.minborder(2); - if (best_score < 2) - best_score = 2; - for (int candidate = 0; candidate < current; candidate++) - { - int row, column; - // Access candidate bitmap - if (! lib[candidate].bits) - continue; - GBitmap &cross_bitmap = *lib[candidate].bits; - int cross_cols = cross_bitmap.columns(); - int cross_rows = cross_bitmap.rows(); - // Prune - if (abs (lib[candidate].area - black_pixels) > best_score) - continue; - if (abs (cross_rows - rows) > 2) - continue; - if (abs (cross_cols - cols) > 2) - continue; - // Compute alignment (these are always +1, 0 or -1) - int cross_col_adjust = (cross_cols-cross_cols/2)-(cols-cols/2); - int cross_row_adjust = (cross_rows-cross_rows/2)-(rows-rows/2); - // Ensure adequate borders - cross_bitmap.minborder (2-cross_col_adjust); - cross_bitmap.minborder (2+cols-cross_cols+cross_col_adjust); - // Count pixel differences (including borders) - int score = 0; - unsigned char *p_row; - unsigned char *p_cross_row; - for (row = -1; row <= rows; row++) - { - p_row = bitmap[row]; - p_cross_row = cross_bitmap[row+cross_row_adjust]; - p_cross_row += cross_col_adjust; - for (column = -1; column <= cols; column++) - if (p_row[column] != p_cross_row[column]) - score ++; - if (score >= best_score) // prune - break; - } - if (score < best_score) - { - best_score = score; - closest = candidate; - } - } - // Decide what to do with the match. - if (closest >= 0) - { - // Mark the shape for cross-coding (``soft pattern matching'') - jshp.parent = closest; - // Exact match ==> Substitution - if (best_score == 0) - { - lib[current].match = closest; - lib[current].bits = 0; - } - // ISSUE: CROSS-IMPROVING. When we decide not to do a substitution, - // we can slightly modify the current shape in order to make it - // closer to the matching shape, therefore improving the file size. - // In fact there is a continuity between pure cross-coding and pure - // substitution... - } - } - - // Process shape substitutions - for (int blitno=0; blitnoget_blit_count(); blitno++) - { - JB2Blit *jblt = jimg->get_blit(blitno); - JB2Shape &jshp = jimg->get_shape(jblt->shapeno); - if (lib[jblt->shapeno].bits==0 && jshp.parent>=0) - { - // Locate parent - int parent = jshp.parent; - while (! lib[parent].bits) - parent = lib[parent].match; - // Compute coordinate adjustment. - int cols = jshp.bits->columns(); - int rows = jshp.bits->rows(); - int cross_cols = lib[parent].bits->columns(); - int cross_rows = lib[parent].bits->rows(); - int cross_col_adjust = (cross_cols-cross_cols/2)-(cols-cols/2); - int cross_row_adjust = (cross_rows-cross_rows/2)-(rows-rows/2); - // Refine vertical adjustment - if (lossy) - { - int adjust = compute_baseline(lib[parent].bits) - - compute_baseline(jshp.bits); - if (adjust < 0) - adjust = - (2 - adjust) / 4; - else - adjust = (2 + adjust) / 4; - if (abs(adjust - cross_row_adjust) <= 1 + cols/16 ) - cross_row_adjust = adjust; - } - // Update blit record. - jblt->bottom -= cross_row_adjust; - jblt->left -= cross_col_adjust; - jblt->shapeno = parent; - // Update shape record. - jshp.bits = 0; - } - } -} - - - - -// ---------------------------------------- -// LOSSLESS COMPRESSION - - -void -tune_jb2image_lossless(JB2Image *jimg) -{ - int nshapes = jimg->get_shape_count(); - GArray lib(nshapes); - compute_matchdata_lossless(jimg, lib); - tune_jb2image(jimg, lib, false); -} - - -// ---------------------------------------- -// LOSSY COMPRESSION -// Thanks to Ilya Mezhirov. - -void -tune_jb2image_lossy(JB2Image *jimg, int dpi, int aggression) -{ - int nshapes = jimg->get_shape_count(); - GArray lib(nshapes); - - mdjvu_matcher_options_t options = mdjvu_matcher_options_create(); - mdjvu_set_aggression(options, aggression); - compute_matchdata_lossy(jimg, lib, dpi, options); - mdjvu_matcher_options_destroy(options); - - tune_jb2image(jimg, lib, true); -} diff --git a/src/djvulibre b/src/djvulibre new file mode 160000 index 0000000..1a47fd3 --- /dev/null +++ b/src/djvulibre @@ -0,0 +1 @@ +Subproject commit 1a47fd3a6396efcbcba892bb415185ddeb6d3535 diff --git a/djvulibre.h b/src/djvulibre.h similarity index 98% rename from djvulibre.h rename to src/djvulibre.h index e017fd3..356890e 100644 --- a/djvulibre.h +++ b/src/djvulibre.h @@ -4,17 +4,17 @@ * Copyright 2012, Mikhail Dektyarev * * This file is part of Simpledjvu. - * + * * Simpledjvu is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * Simpledjvu is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with Simpledjvu. If not, see . * diff --git a/get_pgm_diff.cpp b/src/get_pgm_diff.cpp similarity index 86% rename from get_pgm_diff.cpp rename to src/get_pgm_diff.cpp index c31837c..272b76c 100644 --- a/get_pgm_diff.cpp +++ b/src/get_pgm_diff.cpp @@ -4,17 +4,17 @@ * Copyright 2012, Mikhail Dektyarev * * This file is part of Simpledjvu. - * + * * Simpledjvu is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * Simpledjvu is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with Simpledjvu. If not, see . * @@ -28,11 +28,14 @@ using std::vector; -vector > get_image_diff(const GBitmap &image1, const GBitmap &image2) { +vector > get_image_diff(const GBitmap &image1, const GBitmap &image2) +{ vector > result(image1.columns(), vector (image1.rows())); - for (int i = 0; i < result.size(); ++i) { - for (int j = 0; j < result[i].size(); ++j) { + for (int i = 0; i < result.size(); ++i) + { + for (int j = 0; j < result[i].size(); ++j) + { result[i][j] = static_cast(image1[i][j]) - static_cast(image2[i][j]); } } @@ -40,11 +43,14 @@ vector > get_image_diff(const GBitmap &image1, const GBitmap &image2 return result; } -double Lp_norm(const vector > &data, double p = 1.0) { +double Lp_norm(const vector > &data, double p = 1.0) +{ double result(0.0); - for (int i = 0; i < data.size(); ++i) { - for (int j = 0; j < data[i].size(); ++j) { + for (int i = 0; i < data.size(); ++i) + { + for (int j = 0; j < data[i].size(); ++j) + { result += pow(fabs(data[i][j]), p); } } @@ -52,17 +58,20 @@ double Lp_norm(const vector > &data, double p = 1.0) { return pow(result, 1.0 / p); } -double Lp_diff(const GBitmap &image1, const GBitmap &image2, double p = 2.0) { +double Lp_diff(const GBitmap &image1, const GBitmap &image2, double p = 2.0) +{ vector > diff = get_image_diff(image1, image2); return Lp_norm(diff, p); } -int main(int argc, char *argv[]) { +int main(int argc, char *argv[]) +{ GP gimage1 = GBitmap::create(*ByteStream::create(GURL::Filename::UTF8(argv[1]), "rb")); GP gimage2 = GBitmap::create(*ByteStream::create(GURL::Filename::UTF8(argv[2]), "rb")); - if (gimage1->columns() != gimage2->columns() || gimage1->rows() != gimage2->rows()) { + if (gimage1->columns() != gimage2->columns() || gimage1->rows() != gimage2->rows()) + { std::cerr << "Image sizes don't match\n"; return 1; } diff --git a/hystogram_splitter.cpp b/src/hystogram_splitter.cpp similarity index 79% rename from hystogram_splitter.cpp rename to src/hystogram_splitter.cpp index e4754a6..5b89807 100644 --- a/hystogram_splitter.cpp +++ b/src/hystogram_splitter.cpp @@ -4,17 +4,17 @@ * Copyright 2012, Mikhail Dektyarev * * This file is part of Simpledjvu. - * + * * Simpledjvu is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * Simpledjvu is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with Simpledjvu. If not, see . * @@ -38,19 +38,23 @@ using std::array; typedef array Hystogram; -byte get_right_quantile(const Hystogram &hystogram, double level) { +byte get_right_quantile(const Hystogram &hystogram, double level) +{ int need = std::accumulate(hystogram.begin(), hystogram.end(), 0) * level; int current_sum = 0; - for (int i = hystogram.size() - 1; i >= 0; --i) { + for (int i = hystogram.size() - 1; i >= 0; --i) + { current_sum += hystogram[i]; - if (current_sum >= need) { + if (current_sum >= need) + { return i; } } return 0; } -byte get_left_quantile(const Hystogram &hystogram, double level) { +byte get_left_quantile(const Hystogram &hystogram, double level) +{ return get_right_quantile(hystogram, 1.0 - level); } @@ -59,37 +63,47 @@ byte get_left_quantile(const Hystogram &hystogram, double level) { * otherwise it destroys the Earth */ template - void get_values(const GBitmap &q, const double eps_step, vector > *result) { +void get_values(const GBitmap &q, const double eps_step, vector > *result) +{ vector > row_val(q.rows(), vector (q.columns())); { vector > left_val(q.rows(), vector (q.columns())), right_val(q.rows(), vector (q.columns())); - for (int i = 0; i < q.rows(); ++i) { + for (int i = 0; i < q.rows(); ++i) + { left_val[i][0] = q[i][0]; - for (int j = 1; j < q.columns(); ++j) { + for (int j = 1; j < q.columns(); ++j) + { left_val[i][j] = get(left_val[i][j-1] + eps_step, static_cast(q[i][j])); } right_val[i].back() = q[i][q.columns() - 1]; - for (int j = q.columns() - 2; j >= 0; --j) { + for (int j = q.columns() - 2; j >= 0; --j) + { right_val[i][j] = get(right_val[i][j+1] + eps_step, static_cast(q[i][j])); } - for (int j = 0; j < q.columns(); ++j) { + for (int j = 0; j < q.columns(); ++j) + { row_val[i][j] = get(left_val[i][j], right_val[i][j]); } } } vector > up_val(q.rows(), vector (q.columns())), down_val(q.rows(), vector (q.columns())); - for (int j = 0; j < q.columns(); ++j) { + for (int j = 0; j < q.columns(); ++j) + { up_val[0][j] = row_val[0][j]; - for (int i = 1; i < q.rows(); ++i) { + for (int i = 1; i < q.rows(); ++i) + { up_val[i][j] = get(row_val[i][j], up_val[i-1][j] + eps_step); } down_val.back()[j] = row_val.back()[j]; - for (int i = q.rows() - 2; i >= 0; --i) { + for (int i = q.rows() - 2; i >= 0; --i) + { down_val[i][j] = get(row_val[i][j], up_val[i+1][j] + eps_step); } } - for (int i = 0; i < q.rows(); ++i) { - for (int j = 0; j < q.columns(); ++j) { + for (int i = 0; i < q.rows(); ++i) + { + for (int j = 0; j < q.columns(); ++j) + { (*result)[i][j] = get(up_val[i][j], down_val[i][j]); } } @@ -98,7 +112,8 @@ template /* * @todo: different scales for black and white */ -void get_image_parts(const GBitmap &image, GBitmap &black_result, GBitmap &white_result, int cell_size) { +void get_image_parts(const GBitmap &image, GBitmap &black_result, GBitmap &white_result, int cell_size) +{ int width = image.columns(), height = image.rows(); int v_cells = height / cell_size + (height % cell_size ? 1 : 0), h_cells = width / cell_size + (width % cell_size ? 1 : 0); GP gblack_q = GBitmap::create(v_cells, h_cells); @@ -106,14 +121,19 @@ void get_image_parts(const GBitmap &image, GBitmap &black_result, GBitmap &white GBitmap &black_q = *gblack_q; GBitmap &white_q = *gwhite_q; - for (int i = 0; i < v_cells; ++i) { - for (int j = 0; j < h_cells; ++j) { + for (int i = 0; i < v_cells; ++i) + { + for (int j = 0; j < h_cells; ++j) + { Hystogram hystogram; - for (auto &k : hystogram) { + for (auto &k : hystogram) + { k = 0; } - for (int k = 0; k < cell_size && i*cell_size + k < height; ++k) { - for (int l = 0; l < cell_size && j*cell_size + l < width; ++l) { + for (int k = 0; k < cell_size && i*cell_size + k < height; ++k) + { + for (int l = 0; l < cell_size && j*cell_size + l < width; ++l) + { ++hystogram[image[i*cell_size + k][j*cell_size + l]]; } } @@ -134,16 +154,21 @@ void get_image_parts(const GBitmap &image, GBitmap &black_result, GBitmap &white black_result.set_grays(256); white_result.init(v_cells, h_cells); white_result.set_grays(256); - for (int i = 0; i < v_cells; ++i) { - for (int j = 0; j < h_cells; ++j) { + for (int i = 0; i < v_cells; ++i) + { + for (int j = 0; j < h_cells; ++j) + { black_result[i][j] = static_cast (black[i][j]); white_result[i][j] = static_cast (white[i][j]); - if (black_result[i][j] - white_result[i][j] < MIN_COLORS_DIFF) { - if (black_result[i][j] > 255 - MIN_COLORS_DIFF) { + if (black_result[i][j] - white_result[i][j] < MIN_COLORS_DIFF) + { + if (black_result[i][j] > 255 - MIN_COLORS_DIFF) + { white_result[i][j] -= 255 - black_result[i][j]; black_result[i][j] = 255; } - else { + else + { black_result[i][j] += MIN_COLORS_DIFF; } } diff --git a/hystogram_splitter.h b/src/hystogram_splitter.h similarity index 97% rename from hystogram_splitter.h rename to src/hystogram_splitter.h index 5704279..b1143ec 100644 --- a/hystogram_splitter.h +++ b/src/hystogram_splitter.h @@ -4,17 +4,17 @@ * Copyright 2012, Mikhail Dektyarev * * This file is part of Simpledjvu. - * + * * Simpledjvu is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * Simpledjvu is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with Simpledjvu. If not, see . * @@ -24,8 +24,8 @@ #define HYSTOGRAM_SPLITTER_H_ #include - #include +#include using std::vector; diff --git a/normalize.cpp b/src/normalize.cpp similarity index 90% rename from normalize.cpp rename to src/normalize.cpp index 2977b9a..57d98d2 100644 --- a/normalize.cpp +++ b/src/normalize.cpp @@ -4,17 +4,17 @@ * Copyright 2012, Mikhail Dektyarev * * This file is part of Simpledjvu. - * + * * Simpledjvu is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * Simpledjvu is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with Simpledjvu. If not, see . * @@ -36,23 +36,29 @@ const byte CANONICAL_WHITE_LEVEL = MIN_COLOR; * hope, that black > white * else it can show strange picture, crash, or destroy the Earth */ -byte canonize_level(const byte raw, const byte black, const byte white) { - if (raw >= black) { +byte canonize_level(const byte raw, const byte black, const byte white) +{ + if (raw >= black) + { return CANONICAL_BLACK_LEVEL; } - if (raw <= white) { + if (raw <= white) + { return CANONICAL_WHITE_LEVEL; } return (raw - white) * (CANONICAL_BLACK_LEVEL - CANONICAL_WHITE_LEVEL) / (black - white); } -void normalize_parts(const GBitmap &image, const GBitmap &black, const GBitmap &white, GBitmap &result) { +void normalize_parts(const GBitmap &image, const GBitmap &black, const GBitmap &white, GBitmap &result) +{ int width = image.columns(), height = image.rows(); result.init(height, width); result.set_grays(256); int32 i, j; - for (i = 0; i < height; ++i) { - for (j = 0; j < width; ++j) { + for (i = 0; i < height; ++i) + { + for (j = 0; j < width; ++j) + { result[i][j] = canonize_level(image[i][j], black[i][j], white[i][j]); } } @@ -61,7 +67,8 @@ void normalize_parts(const GBitmap &image, const GBitmap &black, const GBitmap & /* * example from http://www.djvuzone.org/open/doc/GBitmapScaler.html */ -void rescale_bitmap(const GBitmap &in, GBitmap &out) { +void rescale_bitmap(const GBitmap &in, GBitmap &out) +{ int w = in.columns(); // Get input width int h = in.rows(); // Get output width int nw = out.columns(); @@ -72,10 +79,12 @@ void rescale_bitmap(const GBitmap &in, GBitmap &out) { gscaler->scale(provided, in, desired, out); // Rescale } -GP get_norm_image(const GBitmap &image, const int iterations) { +GP get_norm_image(const GBitmap &image, const int iterations) +{ GP current = GBitmap::create(image); GP next = GBitmap::create(); - for (int i = 0; i < iterations; ++i) { + for (int i = 0; i < iterations; ++i) + { GP gblack_small = GBitmap::create(); GBitmap &black_small = *gblack_small; GP gwhite_small = GBitmap::create(); diff --git a/normalize.h b/src/normalize.h similarity index 98% rename from normalize.h rename to src/normalize.h index ddc8eec..0467fae 100644 --- a/normalize.h +++ b/src/normalize.h @@ -4,17 +4,17 @@ * Copyright 2012, Mikhail Dektyarev * * This file is part of Simpledjvu. - * + * * Simpledjvu is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * Simpledjvu is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with Simpledjvu. If not, see . * diff --git a/pgm2jb2.cpp b/src/pgm2jb2.cpp similarity index 90% rename from pgm2jb2.cpp rename to src/pgm2jb2.cpp index 2199e90..98ac2a7 100644 --- a/pgm2jb2.cpp +++ b/src/pgm2jb2.cpp @@ -14,7 +14,7 @@ //C- but WITHOUT ANY WARRANTY; without even the implied warranty of //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //C- GNU General Public License for more details. -//C- +//C- //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from //C- Lizardtech Software. Lizardtech Software has authorized us to //C- replace the original DjVu(r) Reference Library notice by the following @@ -35,16 +35,16 @@ //C- | The computer code originally released by LizardTech under this //C- | license and unmodified by other parties is deemed "the LIZARDTECH //C- | ORIGINAL CODE." Subject to any third party intellectual property -//C- | claims, LizardTech grants recipient a worldwide, royalty-free, -//C- | non-exclusive license to make, use, sell, or otherwise dispose of -//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the -//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU -//C- | General Public License. This grant only confers the right to -//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to -//C- | the extent such infringement is reasonably necessary to enable -//C- | recipient to make, have made, practice, sell, or otherwise dispose -//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to -//C- | any greater extent that may be necessary to utilize further +//C- | claims, LizardTech grants recipient a worldwide, royalty-free, +//C- | non-exclusive license to make, use, sell, or otherwise dispose of +//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the +//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU +//C- | General Public License. This grant only confers the right to +//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to +//C- | the extent such infringement is reasonably necessary to enable +//C- | recipient to make, have made, practice, sell, or otherwise dispose +//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to +//C- | any greater extent that may be necessary to utilize further //C- | modifications or combinations. //C- | //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY @@ -124,20 +124,20 @@ #undef MIN #endif - inline int -MIN(int a, int b) -{ - return ( ab ?a :b); +inline int +MAX(int a, int b) +{ + return ( a>b ?a :b); } @@ -150,8 +150,8 @@ MAX(int a, int b) // -------------------------------------------------- // -- A run of black pixels -struct Run -{ +struct Run +{ int y; // vertical coordinate short x1; // first horizontal coordinate short x2; // last horizontal coordinate @@ -159,7 +159,7 @@ struct Run }; // -- A component descriptor -struct CC +struct CC { GRect bb; // bounding box int npix; // number of black pixels @@ -169,33 +169,33 @@ struct CC // -- An image composed of runs -class CCImage +class CCImage { - public: - int height; // Height of the image in pixels - int width; // Width of the image in pixels - GTArray runs; // array of runs - GTArray ccs; // Array of component descriptors - int nregularccs; // Number of regular ccs (set by merge_and_split_ccs) - int largesize; // CCs larger than that are special - int smallsize; // CCs smaller than that are special - int tinysize; // CCs smaller than that may be removed - CCImage(); - void init(int width, int height, int dpi); - void add_single_run(int y, int x1, int x2, int ccid=0); - void add_bitmap_runs(const GBitmap &bm, int offx=0, int offy=0, int ccid=0); - GP get_bitmap_for_cc(int ccid) const; - GP get_jb2image() const; - void make_ccids_by_analysis(); - void make_ccs_from_ccids(); - void erase_tiny_ccs(); - void merge_and_split_ccs(); - void sort_in_reading_order(); +public: + int height; // Height of the image in pixels + int width; // Width of the image in pixels + GTArray runs; // array of runs + GTArray ccs; // Array of component descriptors + int nregularccs; // Number of regular ccs (set by merge_and_split_ccs) + int largesize; // CCs larger than that are special + int smallsize; // CCs smaller than that are special + int tinysize; // CCs smaller than that may be removed + CCImage(); + void init(int width, int height, int dpi); + void add_single_run(int y, int x1, int x2, int ccid=0); + void add_bitmap_runs(const GBitmap &bm, int offx=0, int offy=0, int ccid=0); + GP get_bitmap_for_cc(int ccid) const; + GP get_jb2image() const; + void make_ccids_by_analysis(); + void make_ccs_from_ccids(); + void erase_tiny_ccs(); + void merge_and_split_ccs(); + void sort_in_reading_order(); }; // -- Compares runs - static inline bool +static inline bool operator <= (const Run &a, const Run &b) { return (a.y= x1 ) { @@ -288,11 +288,16 @@ CCImage::make_ccids_by_analysis() int oid = runs[p].ccid; while (umap[oid] < oid) oid = umap[oid]; - if ((int)id > umap.hbound()) { + if ((int)id > umap.hbound()) + { id = oid; - } else if (id < oid) { + } + else if (id < oid) + { umap[oid] = id; - } else { + } + else + { umap[id] = oid; id = oid; } @@ -327,7 +332,7 @@ CCImage::make_ccids_by_analysis() // -- Constructs the ``ccs'' array from run's ccids. - void +void CCImage::make_ccs_from_ccids() { int n; @@ -337,7 +342,7 @@ CCImage::make_ccs_from_ccids() for (n=0; n<=runs.hbound(); n++) if (pruns[n].ccid > maxccid) maxccid = runs[n].ccid; - // Renumber ccs + // Renumber ccs GTArray armap(0,maxccid); int *rmap = armap; for (n=0; n<=maxccid; n++) @@ -375,7 +380,7 @@ CCImage::make_ccs_from_ccids() // Compute positions for runs of cc int frun = 0; - for (n=0; n0 { - // truncate the current run + // truncate the current run r.ccid = newccid++; int x = (gridj_start+1)*splitsize; r.x2 = x-1; @@ -511,7 +516,7 @@ CCImage::merge_and_split_ccs() newrun.y = y; newrun.x1 = x; newrun.x2 = x_end; - newrun.ccid = newccid++; + newrun.ccid = newccid++; } } } @@ -522,7 +527,7 @@ CCImage::merge_and_split_ccs() // -- Helps sorting cc - static int +static int top_edges_descending (const void *pa, const void *pb) { if (((CC*) pa)->bb.ymax != ((CC*) pb)->bb.ymax) @@ -534,7 +539,7 @@ top_edges_descending (const void *pa, const void *pb) // -- Helps sorting cc - static int +static int left_edges_ascending (const void *pa, const void *pb) { if (((CC*) pa)->bb.xmin != ((CC*) pb)->bb.xmin) @@ -546,7 +551,7 @@ left_edges_ascending (const void *pa, const void *pb) // -- Helps sorting cc - static int +static int integer_ascending (const void *pa, const void *pb) { return ( *(int*)pb - *(int*)pa ); @@ -554,7 +559,7 @@ integer_ascending (const void *pa, const void *pb) // -- Sort ccs in approximate reading order - void +void CCImage::sort_in_reading_order() { if (nregularccs<2) return; @@ -568,7 +573,7 @@ CCImage::sort_in_reading_order() // Subdivide the ccarray list roughly into text lines [LYB] // - Determine maximal top deviation int maxtopchange = width / 40; - if (maxtopchange < 32) + if (maxtopchange < 32) maxtopchange = 32; // - Loop until processing all ccs int ccno = 0; @@ -619,7 +624,7 @@ CCImage::sort_in_reading_order() // -- Creates a bitmap for a particular component -GP +GP CCImage::get_bitmap_for_cc(const int ccid) const { const CC &cc = ccs[ccid]; @@ -641,7 +646,7 @@ CCImage::get_bitmap_for_cc(const int ccid) const // -- Creates a JB2Image with the remaining components -GP +GP CCImage::get_jb2image() const { GP jimg = JB2Image::create(); @@ -670,15 +675,17 @@ CCImage::get_jb2image() const return jimg; } -GP pbm2jb2(const GP &image, int losslevel = 0, int dpi = 300) { +GP pbm2jb2(const GP &image, int losslevel = 0, int dpi = 300) +{ CCImage rimg; rimg.init(image->columns(), image->rows(), dpi); - rimg.add_bitmap_runs(*image); + rimg.add_bitmap_runs(*image); // Component analysis rimg.make_ccids_by_analysis(); // obtain ccids rimg.make_ccs_from_ccids(); // compute cc descriptors - if (losslevel > 0) { + if (losslevel > 0) + { rimg.erase_tiny_ccs(); // clean } rimg.merge_and_split_ccs(); // reorganize weird ccs diff --git a/pgm2jb2.h b/src/pgm2jb2.h similarity index 98% rename from pgm2jb2.h rename to src/pgm2jb2.h index 5fa5701..9dfbcae 100644 --- a/pgm2jb2.h +++ b/src/pgm2jb2.h @@ -4,17 +4,17 @@ * Copyright 2012, Mikhail Dektyarev * * This file is part of Simpledjvu. - * + * * Simpledjvu is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * Simpledjvu is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with Simpledjvu. If not, see . * diff --git a/simpledjvu.cpp b/src/simpledjvu.cpp similarity index 70% rename from simpledjvu.cpp rename to src/simpledjvu.cpp index aead1ff..4ea60dd 100644 --- a/simpledjvu.cpp +++ b/src/simpledjvu.cpp @@ -4,17 +4,17 @@ * Copyright 2012, Mikhail Dektyarev * * This file is part of Simpledjvu. - * + * * Simpledjvu is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * Simpledjvu is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with Simpledjvu. If not, see . * @@ -38,10 +38,12 @@ using std::string; const byte THRESHOLD_LEVEL = 128; -struct Keys { +struct Keys +{ bool include_bg; bool include_fg; int mask_mul; + int dpi; bool use_normalized; int normalize_iters; int threshold_level; @@ -52,12 +54,14 @@ struct Keys { include_bg(true), include_fg(true), mask_mul(2), + dpi(300), use_normalized(false), normalize_iters(200), threshold_level(128), cjb2_loss_level(1), slices_bg({74, 89, 99}), - slices_fg({89}) { + slices_fg({89}) + { } }; @@ -65,114 +69,145 @@ struct Keys { /* * it should be const GBitmap, but for unknown reason GBitmap::save_pgm is not declared as const */ -void save(const GP gimage, const char *fname, bool pgm = true) { - if (pgm) { +void save(const GP gimage, const char *fname, bool pgm = true) +{ + if (pgm) + { gimage->save_pgm(*ByteStream::create(GURL::Filename::UTF8(fname), "wb")); } - else { + else + { gimage->save_pbm(*ByteStream::create(GURL::Filename::UTF8(fname), "wb")); } } enum Chunk { BACKGROUND, FOREGROUND}; -GP make_chunk_mask(const GBitmap &mask, Chunk chunk) { +GP make_chunk_mask(const GBitmap &mask, Chunk chunk) +{ GP result = GBitmap::create(mask.rows(), mask.columns()); result->set_grays(2); int ok_color = chunk == BACKGROUND; - for (int i = 0; i < mask.rows(); ++i) { - for (int j = 0; j < mask.columns(); ++j) { + for (int i = 0; i < mask.rows(); ++i) + { + for (int j = 0; j < mask.columns(); ++j) + { (*result)[i][j] = mask[i][j] == ok_color || - (i > 0 && mask[i-1][j] == ok_color) || - (i < mask.rows() - 1 && mask[i+1][j] == ok_color) || - (j > 0 && mask[i][j-1] == ok_color) || - (j < mask.columns() - 1 && mask[i][j+1] == ok_color); + (i > 0 && mask[i-1][j] == ok_color) || + (i < mask.rows() - 1 && mask[i+1][j] == ok_color) || + (j > 0 && mask[i][j-1] == ok_color) || + (j < mask.columns() - 1 && mask[i][j+1] == ok_color); } } return result; } -void print_help() { +void print_help() +{ std::cerr - << "Usage: simpledjvu [options] input.pgm output.djvu\n" - << "where options =\n" - << "\t-nobg\n" - << "\t-nofg\n" - << "\t-mask_mul n\n" - << "\t-use_normalized\n" - << "\t-normalize_iters n\n" - << "\t-threshold_level n\n" - << "\t-cjb2_loss_level n\n" - << "\t-slices_bg n1,n2,..\n" - << "\t-slices_fg n1,n2,...\n" - ; + << "Usage: simpledjvu [options] input.pgm output.djvu\n" + << "where options =\n" + << "\t-nobg\n" + << "\t-nofg\n" + << "\t-mask_mul n\n" + << "\t-dpi n\n" + << "\t-use_normalized\n" + << "\t-normalize_iters n\n" + << "\t-threshold_level n\n" + << "\t-cjb2_loss_level n\n" + << "\t-slices_bg n1,n2,..\n" + << "\t-slices_fg n1,n2,...\n" + ; } -bool parse_keys(int argc, char *argv[], Keys *keys, char **input, char **output) { - if (argc <= 2) { +bool parse_keys(int argc, char *argv[], Keys *keys, char **input, char **output) +{ + if (argc <= 2) + { std::cerr << "Not enough arguments\n"; return false; } (*input) = argv[argc - 2]; (*output) = argv[argc - 1]; - for (int i = 1; i < argc - 2; ++i) { - if (argv[i][0] != '-') { + for (int i = 1; i < argc - 2; ++i) + { + if (argv[i][0] != '-') + { std::cerr << "Wrong option format: " << argv[i] << '\n'; return false; } string arg = argv[i]; - if (arg == "-nobg") { + if (arg == "-nobg") + { keys->include_bg = false; } - else if (arg == "-nofg") { + else if (arg == "-nofg") + { keys->include_fg = false; } - else if (arg == "-use_normalized") { + else if (arg == "-use_normalized") + { keys->use_normalized = true; } - else if (arg == "-mask_mul" || arg == "-normalize_iters" || arg == "-threshold_level" || arg == "-cjb2_loss_level") { + else if (arg == "-mask_mul" || arg == "-dpi" || arg == "-normalize_iters" || arg == "-threshold_level" || arg == "-cjb2_loss_level") + { ++i; int n; char *endptr; n = strtol(argv[i], &endptr, 10); - if (*endptr) { + if (*endptr) + { std::cerr << "Bad number: " << argv[i] << '\n'; return false; } - if (arg == "-mask_mul") { + if (arg == "-mask_mul") + { keys->mask_mul = n; } - else if (arg == "-normalize_iters") { + else if (arg == "-dpi") + { + keys->dpi = n; + } + else if (arg == "-normalize_iters") + { keys->normalize_iters = n; } - else if (arg == "-threshold_level") { + else if (arg == "-threshold_level") + { keys->threshold_level = n; } - else if (arg == "-cjb2_loss_level") { + else if (arg == "-cjb2_loss_level") + { keys->cjb2_loss_level = n; } } - else if (arg == "-slices_bg" || arg == "-slices_fg") { + else if (arg == "-slices_bg" || arg == "-slices_fg") + { ++i; char *nptr = argv[i], *endptr; vector ns; - while (*nptr) { + while (*nptr) + { int n = strtol(nptr, &endptr, 10); - if (endptr == nptr || *endptr != ',' && *endptr != 0) { + if (endptr == nptr || *endptr != ',' && *endptr != 0) + { std::cerr << "Bad numbers: " << argv[i] << '\n'; return false; } ns.push_back(n); nptr = *endptr ? endptr + 1 : endptr; } - if (arg == "-slices_bg") { + if (arg == "-slices_bg") + { keys->slices_bg = ns; } - else if (arg == "-slices_fg") { + else if (arg == "-slices_fg") + { keys->slices_fg = ns; } } - else { + else + { std::cerr << "Unknown option: " << argv[i] << '\n'; return false; } @@ -186,21 +221,26 @@ bool parse_keys(int argc, char *argv[], Keys *keys, char **input, char **output) * * @todo: understand, how does it work */ -void write_part_to_djvu(const GBitmap &image, const vector &slices, const GP &gmask, IFFByteStream &iff, Chunk chunk) { +void write_part_to_djvu(const GBitmap &image, const vector &slices, const GP &gmask, IFFByteStream &iff, Chunk chunk) +{ GP iw = IW44Image::create_encode(image, gmask); vector parms(slices.size()); - for (int i = 0; i < parms.size(); ++i) { + for (int i = 0; i < parms.size(); ++i) + { parms[i].slices = slices[i]; // is it necessary? parms[i].bytes = 0; parms[i].decibels = 0; } - for (const auto& parm : parms) { - if (chunk == BACKGROUND) { + for (const auto& parm : parms) + { + if (chunk == BACKGROUND) + { iff.put_chunk("BG44"); } - else { + else + { iff.put_chunk("FG44"); } iw->encode_chunk(iff.get_bytestream(), parm); @@ -208,10 +248,12 @@ void write_part_to_djvu(const GBitmap &image, const vector &slices, const G } } -int main(int argc, char *argv[]) { +int main(int argc, char *argv[]) +{ Keys keys; char *input, *output; - if (!parse_keys(argc, argv, &keys, &input, &output)) { + if (!parse_keys(argc, argv, &keys, &input, &output)) + { print_help(); return 1; } @@ -237,7 +279,7 @@ int main(int argc, char *argv[]) { GP ginfo = DjVuInfo::create(); ginfo->width = gmask->get_width(); ginfo->height = gmask->get_height(); - ginfo->dpi = 300; + ginfo->dpi = keys.dpi; iff.put_chunk("INFO"); ginfo->encode(*iff.get_bytestream()); @@ -250,17 +292,21 @@ int main(int argc, char *argv[]) { gnormalized_small->binarize_grays(THRESHOLD_LEVEL); GP gbetter_image; - if (keys.use_normalized) { + if (keys.use_normalized) + { gbetter_image = get_norm_image(*gimage, 2); } - else { + else + { gbetter_image = gimage; } - if (keys.include_fg) { + if (keys.include_fg) + { write_part_to_djvu(*gbetter_image, keys.slices_fg, make_chunk_mask(*gnormalized_small, FOREGROUND), iff, FOREGROUND); } - if (keys.include_bg) { + if (keys.include_bg) + { write_part_to_djvu(*gbetter_image, keys.slices_bg, make_chunk_mask(*gnormalized_small, BACKGROUND), iff, BACKGROUND); } diff --git a/types.h b/src/types.h similarity index 98% rename from types.h rename to src/types.h index 376a842..e8beee2 100644 --- a/types.h +++ b/src/types.h @@ -4,17 +4,17 @@ * Copyright 2012, Mikhail Dektyarev * * This file is part of Simpledjvu. - * + * * Simpledjvu is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * Simpledjvu is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with Simpledjvu. If not, see . *