diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7f5b8d4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +__pycache__/ +/data +/data.h +/playback +/playback.exe +/vid.mp4 diff --git a/readme.md b/README.md similarity index 58% rename from readme.md rename to README.md index 18e3d78..c688275 100644 --- a/readme.md +++ b/README.md @@ -1,8 +1,10 @@ # Color Video to Text Conversion -A few tools to convert video and images into ASCII art in an ANSI terminal. These tools support color output using the ANSI 256 color set, as well as the creation of a self-contained playback executable for video converted to text, with compression able to fit 4 minutes of 80 column 15 FPS video onto a single floppy disk! +A few tools to convert video and images into ASCII art in an ANSI terminal. These tools support color output using the ANSI 256 color set, +as well as the creation of a self-contained playback executable for video converted to text, with compression able to fit 4 minutes of +80 column 15 FPS video onto a single floppy disk! - ## Check out [this video](https://www.youtube.com/watch?v=uGoR3ZYZqjc) for more information and to see sample output for video to text conversion. +## Check out [this video](https://www.youtube.com/watch?v=uGoR3ZYZqjc) for more information and to see sample output for video to text conversion. ![Screenshot](screenshot.png) @@ -10,9 +12,12 @@ A sample image converted to text and printed to the terminal. --- -**Note:** To run these programs, you will need Python 3 installed, alongside NumPy and OpenCV (for image io). +**Note:** To run these programs, you will need Python 3 installed, alongside NumPy and OpenCV (for image I/O). + +Install these dependencies by running `pip install -r requirements.txt`. ## Displaying Images as Text + The python script imageToTextColor.py will print an image file provided as an argument as text to the terminal. `python3 imgToTextColor.py your_image_here.jpg` @@ -20,22 +25,25 @@ The python script imageToTextColor.py will print an image file provided as an ar The width of the output can be configured in the header of the python file. ## Displaying Videos as Text + The python script videoToTextColor.py will play back a video provided as an argument as text to the terminal. `python3 videoToTextColor.py your_video_here.mp4` The width and aspect ratio of the output can be configured in the header of the python file. - ## Creating Video Playback Executables -The provided makefile allows building programs which will play the compressed text encoding of the video stored in the executable. The target video should be named `vid.mp4`, otherwise the path to the video can be changed in the header of convert.py. -To build for Linux targets (using GCC) run +The provided makefile allows building programs which will play the compressed text encoding of the video stored in the executable. +The target video should be named `vid.mp4`, otherwise the path to the video can be changed in the header of convert.py. + +To build for Linux targets (using GCC) run: `make playback` -Otherwise to build for Windows targets (using MinGW) run +Otherwise to build for Windows targets (using MinGW) run: `make playback.exe` -Other aspects of the video encoding, such as character width and framerate can be adjusted in both convert.py and playback.c. **Be sure to update these parameters in both files.** +Other aspects of the video encoding, such as character width and framerate can be adjusted in both convert.py and playback.c. +**Be sure to update these parameters in both files.** diff --git a/convert.py b/convert.py index e829004..b0da984 100644 --- a/convert.py +++ b/convert.py @@ -1,246 +1,278 @@ import cv2 import numpy as np -import time import multiprocessing from joblib import Parallel, delayed -aspect_ratio = 16 / 9 +ASPECT_RATIO = 16 / 9 -#Dimensions of the output in terminal characters -width = 80 -height = int(width / (2 * aspect_ratio)) +# Dimensions of the output in terminal characters +WIDTH = 80 +HEIGHT = int(WIDTH / (2 * ASPECT_RATIO)) # Framerate of the source and output video -src_FPS = 30 -dest_FPS = 15 +SRC_FPS = 30 +DEST_FPS = 15 -num_cores = multiprocessing.cpu_count() +NUM_CORES = multiprocessing.cpu_count() cap = cv2.VideoCapture('vid.mp4') frames = [] +# Our characters, and their approximate brightness values +CHARSET = ' ,(S#g@' +LEVELS = [0.000, 1.060, 2.167, 3.036, 3.977, 4.730, 6.000] +NUMCHARS = len(CHARSET) -#Our characters, and their approximate brightness values -charSet = " ,(S#g@" -levels = [0.000, 1.060, 2.167, 3.036, 3.977, 4.730, 6.000] -numChrs = len(charSet) - - -# Converts a greyscale video frame into a dithered 7-color frame -def processFrame(scaled): - reduced = scaled * 6. / 255 - - out = np.zeros((height, width), dtype= np.int8) - - line = '' - for y in range(height): - for x in range(width): - level = min(6, max(0, int(reduced[y, x]))) - - error = reduced[y, x] - levels[level] - - err16 = error / 16 - - if (x + 1) < width: - reduced[y , x + 1] += 7 * err16 - if (y + 1) < height: - reduced[y + 1, x ] += 5 * err16 - - if (x + 1) < width: - reduced[y + 1, x + 1] += 1 * err16 - if (x - 1) > 0: - reduced[y + 1, x - 1] += 3 * err16 - - out[y, x] = level - - return out - -# Prints out a frame in ASCII -def toStr(frame): - line = '' - - for y in range(height): - for x in range(width): - line += charSet[frame[y, x]] - line += '\n' - - return line - -# Compute the prediction matrix for each character combination -# Each row in this matrix corresponds with a character, and lists -# in decreasing order, the next most likely character to follow this one -# -# We also convert the provided frame to this new markov encoding, and provide -# the count of each prediction rank to be passed to the huffman encoding -def computeMarkov(frame): - mat = np.zeros((numChrs, numChrs)).astype(np.uint16) - - h, w = frame.shape - - prevChar = 0 - - for y in range(h): - for x in range(w): - char = frame[y, x] - - mat[prevChar, char] += 1 - - prevChar = char - - ranks = np.zeros((numChrs, numChrs)).astype(np.uint16) - for i in range(numChrs): - ranks[i][mat[i].argsort()] = 6 - np.arange(numChrs) - - cnt = np.zeros(numChrs).astype(np.uint16) - - out = np.zeros_like(frame) - prevChar = 0 - for y in range(h): - for x in range(w): - char = frame[y, x] - - out[y, x] = ranks[prevChar, char] - cnt[out[y, x]] += 1 - - prevChar = char - - return out, ranks, cnt - -# Computes Huffman encodings based on the counts of each number in the frame -def computeHuffman(cnts): - codes = [] - sizes = [] - tree = [] - for i in range(len(cnts)): - codes.append('') - sizes.append((cnts[i], [i], i)) - tree.append((i, i)) - - sizes = sorted(sizes, reverse = True) - - while(len(sizes) > 1): - # Take the two least frequent entries - right = sizes.pop() - left = sizes.pop() - - (lnum, lchars, ltree) = left - (rnum, rchars, rtree) = right - - # Add a new tree node - tree.append((ltree, rtree)) - - # Update the encodings - for char in lchars: - codes[char] = '0' + codes[char] - for char in rchars: - codes[char] = '1' + codes[char] - - # Merge these entries - new = (lnum + rnum, lchars + rchars, len(tree) - 1) - - # Find the position in the list to inser these entries - for insertPos in range(len(sizes) + 1): - # Append if we hit the end of the list - if(insertPos == len(sizes)): - sizes.append(new) - break - - cnt, _, _ = sizes[insertPos] - - if(cnt <= lnum + rnum): - sizes.insert(insertPos, new) + +def process_frame(scaled): + ''' + Converts a greyscale video frame into a dithered 7-color frame + ''' + + reduced = scaled * 6. / 255 + + out = np.zeros((HEIGHT, WIDTH), dtype=np.int8) + + for y in range(HEIGHT): + for x in range(WIDTH): + level = min(6, max(0, int(reduced[y, x]))) + + error = reduced[y, x] - LEVELS[level] + + err16 = error / 16 + + if (x + 1) < WIDTH: + reduced[y, x + 1] += 7 * err16 + if (y + 1) < HEIGHT: + reduced[y + 1, x] += 5 * err16 + + if (x + 1) < WIDTH: + reduced[y + 1, x + 1] += 1 * err16 + if (x - 1) > 0: + reduced[y + 1, x - 1] += 3 * err16 + + out[y, x] = level + + return out + + +def frame_to_str(frame): + ''' + Prints out a frame in ASCII + ''' + + line = '' + + for y in range(HEIGHT): + for x in range(WIDTH): + line += CHARSET[frame[y, x]] + line += '\n' + + return line + + +def compute_markov(frame): + ''' + Compute the prediction matrix for each character combination + Each row in this matrix corresponds with a character, and lists + in decreasing order, the next most likely character to follow + this one + + We also convert the provided frame to this new markov encoding, + and provide the count of each prediction rank to be passed + to the huffman encoding + ''' + + mat = np.zeros((NUMCHARS, NUMCHARS)).astype(np.uint16) + + h, w = frame.shape + + prevChar = 0 + + for y in range(h): + for x in range(w): + char = frame[y, x] + + mat[prevChar, char] += 1 + + prevChar = char + + ranks = np.zeros((NUMCHARS, NUMCHARS)).astype(np.uint16) + for i in range(NUMCHARS): + ranks[i][mat[i].argsort()] = 6 - np.arange(NUMCHARS) + + cnt = np.zeros(NUMCHARS).astype(np.uint16) + + out = np.zeros_like(frame) + prevChar = 0 + for y in range(h): + for x in range(w): + char = frame[y, x] + + out[y, x] = ranks[prevChar, char] + cnt[out[y, x]] += 1 + + prevChar = char + + return out, ranks, cnt + + +def compute_huffman(cnts): + ''' + Computes Huffman encodings based on the counts of + each number in the frame + ''' + + codes = [] + sizes = [] + tree = [] + for i in range(len(cnts)): + codes.append('') + sizes.append((cnts[i], [i], i)) + tree.append((i, i)) + + sizes = sorted(sizes, reverse=True) + + while len(sizes) > 1: + # Take the two least frequent entries + right = sizes.pop() + left = sizes.pop() + + (lnum, lchars, ltree) = left + (rnum, rchars, rtree) = right + + # Add a new tree node + tree.append((ltree, rtree)) + + # Update the encodings + for char in lchars: + codes[char] = '0' + codes[char] + for char in rchars: + codes[char] = '1' + codes[char] + + # Merge these entries + new = (lnum + rnum, lchars + rchars, len(tree) - 1) + + # Find the position in the list to inser these entries + for insertPos in range(len(sizes) + 1): + # Append if we hit the end of the list + if insertPos == len(sizes): + sizes.append(new) + break + + cnt, _, _ = sizes[insertPos] + + if cnt <= lnum + rnum: + sizes.insert(insertPos, new) + break + + return codes, tree + + +def convert_huffman(markov_frame, codes): + ''' + Take a markov frame and an array of huffman encodings, + and create an array of bytes corresponding to + the compressed frame + ''' + + out = '' + + h, w = frame.shape + + for y in range(h): + for x in range(w): + out = out + codes[markov_frame[y, x]] + + # Pad this bit-string to be byte-aligned + padding = (8 - (len(out) % 8)) % 8 + out += '0' * padding + + # Convert each octet to a char + compressed = [] + for i in range(0, len(out), 8): + byte = out[i:i+8] + char = 0 + for bit in range(8): + char *= 2 + if byte[bit] == '1': + char += 1 + + compressed.append(char) + + return compressed + + +def encode_matrix(ranks): + ''' + Converts a rank matrix into a binary format to be stored in + the output file + ''' + + out = [] + + for row in ranks: + encoding = 0 + + fact = 1 + idxs = list(range(len(CHARSET))) + + for rank in range(len(CHARSET)): + rank = list(row).index(rank) + encoding += idxs.index(rank) * fact + + fact *= len(idxs) + idxs.remove(rank) + + low_byte = int(encoding) % 256 + high_byte = (encoding - low_byte) // 256 + + out.append(high_byte) + out.append(low_byte) + + return out + + +def encode_tree(tree): + ''' + Converts the huffman tree into a binary format to be stored in + the output file + ''' + + tree = tree[len(CHARSET):] + + out = [] + + for (l, r) in tree: + out.append(l * 16 + r) + + return out + + +# Load all frames into memory, then convert them to greyscale +# and resize them to our terminal dimensions +vid_frames = [] +while cap.isOpened(): + if (len(vid_frames) % 500) == 0: + print(f'Loading frame {len(vid_frames)}') + + # Skip frames to reach target framerate + for i in range(int(SRC_FPS / DEST_FPS)): + ret, frame = cap.read() + + if frame is None: break - return codes, tree - -# Take a markov frame and an array of huffman encodings, and create an array of -# bytes corresponding to the compressed frame -def convertHuffman(markovFrame, codes): - out = '' - - h, w = frame.shape - - for y in range(h): - for x in range(w): - out = out + codes[markovFrame[y, x]] - - # Pad this bit-string to be byte-aligned - padding = (8 - (len(out) % 8)) % 8 - out += ("0" * padding) - - # Convert each octet to a char - compressed = [] - for i in range(0, len(out), 8): - byte = out[i:i+8] - char = 0 - for bit in range(8): - char *= 2 - if byte[bit] == "1": - char += 1 - - compressed.append(char) - - return compressed - -# Converts a rank matrix into a binary format to be stored in the output file -def encodeMatrix(ranks): - out = [] - - for row in ranks: - encoding = 0 - - fact = 1 - idxs = list(range(len(charSet))) - - for rank in range(len(charSet)): - rank = list(row).index(rank) - encoding += idxs.index(rank) * fact - - fact *= len(idxs) - idxs.remove(rank) - - low_byte = int(encoding) % 256 - high_byte = (encoding - low_byte) // 256 - - out.append(high_byte) - out.append(low_byte) - - return out - -# Converts the huffman tree into a binary format to be stored in the output file -def encodeTree(tree): - tree = tree[len(charSet):] - - out = [] - - for (l, r) in tree: - out.append(l * 16 + r) - - return out - -# Load all frames into memory, then convert them to greyscale and resize them to -# our terminal dimensions -vidFrames = [] -while(cap.isOpened()): - if (len(vidFrames) % 500) == 0: - print('Loading frame %i' % len(vidFrames)) - - # Skip frames to reach target framerate - for i in range(int(src_FPS / dest_FPS)): - ret, frame = cap.read() - - if frame is None: - break - - gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) - scaled = cv2.resize(gray, (width, height)) - - vidFrames.append(scaled) + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + scaled = cv2.resize(gray, (WIDTH, HEIGHT)) + + vid_frames.append(scaled) # Compute dithering for all frames in parallel print('Dithering Frames') -frames = Parallel(n_jobs=num_cores)(delayed(processFrame)(i) for i in vidFrames) +frames = Parallel(n_jobs=NUM_CORES) +(delayed(process_frame)(i)for i in vid_frames) # Compute markov and huffman encoding for all frames print('Encoding Frames') @@ -248,25 +280,25 @@ def encodeTree(tree): size = 0 with open('data', 'wb') as filehandle: - for frame in frames: - markovFrame, ranks, cnts = computeMarkov(frame) + for frame in frames: + markov_frame, ranks, cnts = compute_markov(frame) - codes, tree = computeHuffman(cnts) - chars = convertHuffman(markovFrame, codes) + codes, tree = compute_huffman(cnts) + chars = convert_huffman(markov_frame, codes) - matrixData = encodeMatrix(ranks) - treeData = encodeTree(tree) + matrix_data = encode_matrix(ranks) + tree_data = encode_tree(tree) - filehandle.write(bytearray(matrixData)) - filehandle.write(bytearray(treeData)) - filehandle.write(bytearray(chars)) + filehandle.write(bytearray(matrix_data)) + filehandle.write(bytearray(tree_data)) + filehandle.write(bytearray(chars)) - size += len(matrixData) + len(treeData) + len(chars) + size += len(matrix_data) + len(tree_data) + len(chars) # Print the size of the output file in human-readable form if size > 1048576: - print('%.1f MB' % (size / 1048576)) + print(f'{size / 1048576:.1f} MB') elif size > 1024: - print('%.1f KB' % (size / 1024)) + print(f'{size / 1024:.1f} kB') else: - print('%i B' % (size)) \ No newline at end of file + print(f'{size} B') diff --git a/imgToTextColor.py b/imgToTextColor.py index 23167f5..73a21fc 100644 --- a/imgToTextColor.py +++ b/imgToTextColor.py @@ -1,62 +1,68 @@ import numpy as np -import cv2 import pickle -import sys -#Width of the output in terminal characters -width = 80 -height = 1 +# Load in color lookup table data +with open('colors.pkl', 'rb') as f: + LERPED = pickle.load(f) +LUT = np.load('LUT.npy') -#Our characters, and their approximate brightness values -charSet = " ,(S#g@@g#S(, " +def set_color(bg, fg): + ''' + Generates a character sequence to set the foreground + and background colors + ''' + return f'\u001b[48;5;{bg};38;5;{fg}m' -# Generates a character sequence to set the foreground and background colors -def setColor (bg, fg): - return "\u001b[48;5;%s;38;5;%sm" % (bg, fg) -black = setColor(16, 16) +def convert_img(img, charset=' ,(S#g@@g#S(, ', width=80, height=1): + ''' + Convert an RGB image to a stream of text with ANSI color codes + ''' -# Load in color lookup table data -lerped = pickle.load( open( "colors.pkl", "rb" ) ) -LUT = np.load("LUT.npy") - -# Convert an RGB image to a stream of text with ANSI color codes -def convertImg(img): - line = "" - - for row in img: - for color in row: - color = np.round(color).astype(int) - - b, g, r = color[0], color[1], color[2] - - # Lookup the color index in the RGB lookup table - idx = LUT[b, g, r] - - # Get the ANSI color codes and lerp character - bg, fg, lerp, rgb = lerped[idx] - - char = charSet[lerp] - - line += "%s%c" % (setColor(bg, fg), char) - # End each line with a black background to avoid color fringe - line += "%s\n" % black - - # Move the cursor back to the top of the frame to prevent rolling - line += "\u001b[%iD\u001b[%iA" % (width, height + 1) - return line - -if len(sys.argv) == 2: - img = cv2.imread(sys.argv[1]) - - # Match the aspect ratio to that of the provided image - src_height, src_width, _ = img.shape - - aspect_ratio = src_width / src_height - height = int(width / (2 * aspect_ratio)) - - img = cv2.resize(img, (width, height)) - print(convertImg(img)) -else: - print("Expected image file as argument.") \ No newline at end of file + line = '' + + for row in img: + for color in row: + color = np.round(color).astype(int) + + b, g, r = color[0], color[1], color[2] + + # Lookup the color index in the RGB lookup table + idx = LUT[b, g, r] + + # Get the ANSI color codes and lerp character + bg, fg, lerp, rgb = LERPED[idx] + + char = charset[lerp] + + line += set_color(bg, fg) + char + # End each line with a black background to avoid color fringe + line += '\u001b[48;5;16;38;5;16m\n' + + # Move the cursor back to the top of the frame to prevent rolling + line += f'\u001b[{width}D\u001b[{height + 1}A' + return line + + +if __name__ == '__main__': + import cv2 + import sys + + # Width of the output in terminal characters + WIDTH = 80 + HEIGHT = 1 + + if len(sys.argv) == 2: + img = cv2.imread(sys.argv[1]) + + # Match the aspect ratio to that of the provided image + src_height, src_width, _ = img.shape + + aspect_ratio = src_width / src_height + HEIGHT = int(WIDTH / (2 * aspect_ratio)) + + img = cv2.resize(img, (WIDTH, HEIGHT)) + print(convert_img(img, width=WIDTH, height=HEIGHT)) + else: + print('Expected image file as argument.') diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8e88864 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +joblib +opencv-python diff --git a/videoToTextColor.py b/videoToTextColor.py index c891ae3..6c6304f 100644 --- a/videoToTextColor.py +++ b/videoToTextColor.py @@ -1,68 +1,29 @@ -import numpy as np -import cv2 -import pickle -import sys +from imgToTextColor import convert_img -aspect_ratio = 16 / 9 +if __name__ == '__main__': + import cv2 + import sys -#Dimensions of the output in terminal characters -width = 80 -height = int(width / (2 * aspect_ratio)) + ASPECT_RATIO = 16 / 9 + # Dimensions of the output in terminal characters + WIDTH = 80 + HEIGHT = int(WIDTH / (2 * ASPECT_RATIO)) + if len(sys.argv) == 2: + if sys.argv[1].startswith("cam:"): + cap = cv2.VideoCapture(int(sys.argv[1][4:])) + else: + cap = cv2.VideoCapture(sys.argv[1]) + while cap.isOpened(): + ret, frame = cap.read() -#Our characters, and their approximate brightness values -charSet = " ,(S#g@@g#S(, " - -# Generates a character sequence to set the foreground and background colors -def setColor (bg, fg): - return "\u001b[48;5;%s;38;5;%sm" % (bg, fg) - -black = setColor(16, 16) - -# Load in color lookup table data -lerped = pickle.load( open( "colors.pkl", "rb" ) ) -LUT = np.load("LUT.npy") - -# Convert an RGB image to a stream of text with ANSI color codes -def convertImg(img): - line = "" - - for row in img: - for color in row: - color = np.round(color).astype(int) - - b, g, r = color[0], color[1], color[2] - - # Lookup the color index in the RGB lookup table - idx = LUT[b, g, r] - - # Get the ANSI color codes and lerp character - bg, fg, lerp, rgb = lerped[idx] - - char = charSet[lerp] - - line += "%s%c" % (setColor(bg, fg), char) - # End each line with a black background to avoid color fringe - line += "%s\n" % black - - # Move the cursor back to the top of the frame to prevent rolling - line += "\u001b[%iD\u001b[%iA" % (width, height + 1) - return line - - -if len(sys.argv) == 2: - cap = cv2.VideoCapture(sys.argv[1]) - - while(cap.isOpened()): - ret, frame = cap.read() - - if frame is None: - break - - img = cv2.resize(frame, (width, height)) - print(convertImg(img)) -else: - print("Expected video file as argument.") + if frame is None: + break + img = cv2.resize(frame, (WIDTH, HEIGHT)) + print(convert_img(img, width=WIDTH, height=HEIGHT)) + else: + print('Expected video file or webcam ID ("cam:n", \ + where n is the camera index, starting with 0) as argument.')