diff --git a/.gitignore b/.gitignore index 7b004e5..9221102 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,10 @@ +#Data files +*.csv +*.json +*.ndjson +*.db + + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -191,4 +198,9 @@ cython_debug/ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data # refer to https://docs.cursor.com/context/ignore-files .cursorignore -.cursorindexingignore \ No newline at end of file +.cursorindexingignore + +# Dataset files +*.csv +*.json +*.ndjson \ No newline at end of file diff --git a/Generated java handoff package.zip b/Generated java handoff package.zip new file mode 100644 index 0000000..333f71b Binary files /dev/null and b/Generated java handoff package.zip differ diff --git a/Library.ipynb b/Library.ipynb new file mode 100644 index 0000000..a6d2814 --- /dev/null +++ b/Library.ipynb @@ -0,0 +1,36 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "d3799054-696c-48ee-b1db-d95976e59a30", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "df = pd.read_csv(\"credits.csv, \"keywords.csv\", \"links_small.csv\", \"links.csv\", )" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/README.md b/README.md index bc97835..139597f 100644 --- a/README.md +++ b/README.md @@ -1,48 +1,2 @@ -# Shhhh! CentralLibraryData -the place CentralLibrary gets its content from. -As a team, fork this repository to an Organization and submit the URL of your fork via the Student Portal. Each teammate will submit the **SAME URL.** - -There are things you _need to know_ and ask Instructors about. What are they? When can we schedule some dialog sessions so that you can get some ideas about what to study? - -Group Work requires that y'all think about plans, and priorities, and schedule some time to get to experts and senior devs to ask intelligent questions, and learn important topics. - -_What are they?_ - -## Project Overview - -Build a series of pipelines that feed the content needed by a comprehensive Library Management System built by the Java group. - -This is a Data project matched to the Java project [Central Library](https://github.com/ZCW-Summer25/CentralLibrary.git). - -The Java group builds that project, Data, you get to wrangle the data in the datasets below into a form that you agree with Java on. They load it, we get cool software. - -## Learning Objectives - -- pipelines - for real -- data formats (csv, json) -- wrangling data into useful formats - -## Need to Know - -- pandas -- python data structures - -### Content Datasets - -and this is NOT public: https://zcw-students-projects.s3.us-east-1.amazonaws.com/LMSDataForWeek3Project/LMS-DataStuff.zip - -It contains the collected data for this project. -You need to get it from an instructor, we're not gonna store it in this github repo, it's too Big. - -## The Pipeline Exercises - - -These are the conceptual sub-projects. -They will be useful in understanding how and what you need to do to provide Java with the data y'all need to make the project complete. - -https://github.com/ZCW-Summer25/PipelineOne -https://github.com/ZCW-Summer25/PipelineTwo -https://github.com/ZCW-Summer25/PipelineThree -https://github.com/ZCW-Summer25/PipelineFour diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000..7cc7984 --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,78 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "980e2938-bd3c-4095-b123-59fcf1c8a572", + "metadata": {}, + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'data/raw/keywords.csv'", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mFileNotFoundError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m pandas \u001b[38;5;28;01mas\u001b[39;00m pd\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m numpy \u001b[38;5;28;01mas\u001b[39;00m np\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m df = pd.read_csv(\u001b[33m'data/raw/keywords.csv'\u001b[39m)\n\u001b[32m 4\u001b[39m \u001b[38;5;66;03m#---SHAPE---\u001b[39;00m\n\u001b[32m 5\u001b[39m print(f'Rows: {df.shape[\u001b[32m0\u001b[39m]:,} Columns: {df.shape[\u001b[32m1\u001b[39m]}')\n\u001b[32m 6\u001b[39m print()\n", + "\u001b[36mFile \u001b[39m\u001b[32m/opt/homebrew/Cellar/jupyterlab/4.5.6_2/libexec/lib/python3.14/site-packages/pandas/io/parsers/readers.py:873\u001b[39m, in \u001b[36mread_csv\u001b[39m\u001b[34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, skip_blank_lines, parse_dates, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[39m\n\u001b[32m 861\u001b[39m kwds_defaults = _refine_defaults_read(\n\u001b[32m 862\u001b[39m dialect,\n\u001b[32m 863\u001b[39m delimiter,\n\u001b[32m (...)\u001b[39m\u001b[32m 869\u001b[39m dtype_backend=dtype_backend,\n\u001b[32m 870\u001b[39m )\n\u001b[32m 871\u001b[39m kwds.update(kwds_defaults)\n\u001b[32m--> \u001b[39m\u001b[32m873\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/opt/homebrew/Cellar/jupyterlab/4.5.6_2/libexec/lib/python3.14/site-packages/pandas/io/parsers/readers.py:300\u001b[39m, in \u001b[36m_read\u001b[39m\u001b[34m(filepath_or_buffer, kwds)\u001b[39m\n\u001b[32m 297\u001b[39m _validate_names(kwds.get(\u001b[33m\"\u001b[39m\u001b[33mnames\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[32m 299\u001b[39m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m300\u001b[39m parser = \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 302\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[32m 303\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n", + "\u001b[36mFile \u001b[39m\u001b[32m/opt/homebrew/Cellar/jupyterlab/4.5.6_2/libexec/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1645\u001b[39m, in \u001b[36mTextFileReader.__init__\u001b[39m\u001b[34m(self, f, engine, **kwds)\u001b[39m\n\u001b[32m 1642\u001b[39m \u001b[38;5;28mself\u001b[39m.options[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m] = kwds[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m]\n\u001b[32m 1644\u001b[39m \u001b[38;5;28mself\u001b[39m.handles: IOHandles | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1645\u001b[39m \u001b[38;5;28mself\u001b[39m._engine = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/opt/homebrew/Cellar/jupyterlab/4.5.6_2/libexec/lib/python3.14/site-packages/pandas/io/parsers/readers.py:1904\u001b[39m, in \u001b[36mTextFileReader._make_engine\u001b[39m\u001b[34m(self, f, engine)\u001b[39m\n\u001b[32m 1902\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[32m 1903\u001b[39m mode += \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m-> \u001b[39m\u001b[32m1904\u001b[39m \u001b[38;5;28mself\u001b[39m.handles = \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1905\u001b[39m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1906\u001b[39m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1907\u001b[39m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1908\u001b[39m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcompression\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1909\u001b[39m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmemory_map\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1910\u001b[39m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m=\u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1911\u001b[39m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding_errors\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstrict\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1912\u001b[39m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstorage_options\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1913\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1914\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m.handles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1915\u001b[39m f = \u001b[38;5;28mself\u001b[39m.handles.handle\n", + "\u001b[36mFile \u001b[39m\u001b[32m/opt/homebrew/Cellar/jupyterlab/4.5.6_2/libexec/lib/python3.14/site-packages/pandas/io/common.py:926\u001b[39m, in \u001b[36mget_handle\u001b[39m\u001b[34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[39m\n\u001b[32m 921\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[32m 922\u001b[39m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[32m 923\u001b[39m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[32m 924\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m ioargs.encoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs.mode:\n\u001b[32m 925\u001b[39m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m926\u001b[39m handle = \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[32m 927\u001b[39m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 928\u001b[39m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 929\u001b[39m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 930\u001b[39m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 931\u001b[39m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 932\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 933\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 934\u001b[39m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[32m 935\u001b[39m handle = \u001b[38;5;28mopen\u001b[39m(handle, ioargs.mode)\n", + "\u001b[31mFileNotFoundError\u001b[39m: [Errno 2] No such file or directory: 'data/raw/keywords.csv'" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "df = pd.read_csv('data/raw/keywords.csv')\n", + "#---SHAPE---\n", + "print(f'Rows: {df.shape[0]:,} Columns: {df.shape[1]}')\n", + "print()\n", + "#---Column Names and Types---\n", + "print(df.dtypes)\n", + "print()\n", + "#---First 5 Rows (visual check)---\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36eacfc6-1a42-44f4-8669-01fecc069fde", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "387da07a-6cd8-407e-a53e-ccfdadf070c5", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/clean_data.py b/clean_data.py new file mode 100644 index 0000000..f9617f4 --- /dev/null +++ b/clean_data.py @@ -0,0 +1,777 @@ +#!/usr/bin/env python3 +"""Clean CSV, JSON, and NDJSON files in a directory tree. + +By default, cleaned files are written to ./cleaned_data to avoid modifying +original source files. Use --in-place to overwrite files. +""" + +from __future__ import annotations + +import argparse +import csv +import io +import json +import re +from datetime import date, datetime +from pathlib import Path +from typing import Iterable + +try: + from validate_sample import validate as _validate_sample +except ImportError: # validate_sample not on path; validation silently skipped + _validate_sample = None # type: ignore[assignment] +from typing import Iterable, TextIO + +import pandas as pd + + +SUPPORTED_EXTENSIONS = {".csv", ".json", ".ndjson"} + +# Directories to skip entirely during scanning +SKIP_DIRS = {".venv", "venv", ".git", "__pycache__", "node_modules", ".tox", "cleaned_data"} +SKIP_DIRS = {".venv", "venv", ".git", "__pycache__", "node_modules", ".tox", "cleaned_data", "sample_data_java"} + +# Optional size guard for very large files. Disabled by default. +DEFAULT_MAX_FILE_MB: int | None = None + +NULL_TOKENS = {"", "null", "none", "na", "n/a", "nan"} +INT_COLUMN_HINTS = {"count", "pages", "year", "total", "quantity", "num", "number"} +FLOAT_COLUMN_HINTS = {"rating", "score", "amount", "price", "lat", "latitude", "lon", "longitude"} +BOOL_COLUMN_HINTS = {"is", "has", "active", "enabled", "flag"} +STRING_KEY_EXEMPT_FROM_TYPE_COERCION = {"id", "issn", "isbn", "zip", "postcode", "code"} +DATE_FIELD_HINTS = {"date", "day", "published", "created", "updated", "start", "end"} +NUMERIC_COLUMN_HINTS = INT_COLUMN_HINTS.union(FLOAT_COLUMN_HINTS).union({"pages", "timestamp", "index", "text"}) +MAX_DUPLICATE_INDEX_PRINT = 200 + + +def _normalize_whitespace(value: str) -> str: + return re.sub(r"\s+", " ", value).strip() + + +def _is_null_like(value: str) -> bool: + return value.strip().lower() in NULL_TOKENS + + +def _parse_iso_date(value: str) -> str | None: + token = value.strip() + if not token: + return None + + # Fast path for values already in YYYY-MM-DD. + if re.fullmatch(r"\d{4}-\d{2}-\d{2}", token): + return token + + # Try common date-only patterns first to avoid locale parsing ambiguity. + date_patterns = ( + "%Y/%m/%d", + "%d/%m/%Y", + "%m/%d/%Y", + "%d-%m-%Y", + "%m-%d-%Y", + "%d %b %Y", + "%d %B %Y", + "%b %d, %Y", + "%B %d, %Y", + ) + for pattern in date_patterns: + try: + return datetime.strptime(token, pattern).date().isoformat() + except ValueError: + continue + + # Handle ISO datetimes and similar values with time components. + try: + normalized = token.replace("Z", "+00:00") + return datetime.fromisoformat(normalized).date().isoformat() + except ValueError: + return None + + +def standardize_dates(value: str) -> str | None: + """Normalize supported date formats to YYYY-MM-DD.""" + return _parse_iso_date(value) + + +def _field_name_tokens(field_name: str) -> set[str]: + lowered = field_name.strip().lower() + parts = set(filter(None, re.split(r"[^a-z0-9]+", lowered))) + if lowered.startswith("is_"): + parts.add("is") + if lowered.startswith("has_"): + parts.add("has") + return parts + + +def _is_date_field_name(field_name: str) -> bool: + tokens = _field_name_tokens(field_name) + return bool(tokens.intersection(DATE_FIELD_HINTS)) + + +def _should_skip_type_coercion(field_name: str) -> bool: + tokens = _field_name_tokens(field_name) + return bool(tokens.intersection(STRING_KEY_EXEMPT_FROM_TYPE_COERCION)) + + +def _coerce_typed_value(value: str, field_name: str | None = None) -> object: + normalized = _normalize_whitespace(value) + lowered = normalized.lower() + + if lowered in {"true", "false"}: + return lowered == "true" + + # Preserve IDs/codes and similarly structured string keys. + if field_name and _should_skip_type_coercion(field_name): + return normalized + + if re.fullmatch(r"[+-]?\d+", normalized): + try: + return int(normalized) + except ValueError: + pass + + if re.fullmatch(r"[+-]?\d*\.\d+", normalized): + try: + return float(normalized) + except ValueError: + pass + + return normalized + + +def _normalize_value(value: object, field_name: str | None = None) -> object: + if isinstance(value, dict): + return {k: _normalize_value(v, k) for k, v in value.items()} + + if isinstance(value, list): + return [_normalize_value(item, field_name) for item in value] + + if isinstance(value, (datetime, date)): + return value.date().isoformat() if isinstance(value, datetime) else value.isoformat() + + if isinstance(value, str): + normalized = _normalize_whitespace(value) + if _is_null_like(normalized): + return None + + if field_name and _is_date_field_name(field_name): + parsed = standardize_dates(normalized) + if parsed is not None: + return parsed + + return _coerce_typed_value(normalized, field_name) + + return value + + +def iter_data_files(root: Path) -> Iterable[Path]: + for path in root.rglob("*"): + # Skip files inside excluded directories + if any(part in SKIP_DIRS or part.startswith(".") for part in path.parts[len(root.parts):-1]): + continue + if path.is_file() and path.suffix.lower() in SUPPORTED_EXTENSIONS: + yield path + + +def _emit(line: str, report_stream: TextIO | None = None) -> None: + print(line) + if report_stream is not None: + report_stream.write(line + "\n") + + +def _should_write_sidecar_report(path: Path) -> bool: + return not path.name.endswith("-schema.json") + + +def _report_path_for(path: Path, root: Path, report_dir: Path | None) -> Path: + if report_dir is not None: + report_path = report_dir.resolve() / path.relative_to(root) + return report_path.with_suffix(report_path.suffix + ".report.txt") + return path.with_suffix(path.suffix + ".report.txt") + + +def _print_non_csv_report(path: Path, target: Path, changed: bool, dupes: int, nulls: int, report_stream: TextIO | None = None) -> None: + _emit(f"\n=== REPORT: {path} ===", report_stream) + _emit(f"file type: {path.suffix.lower()}", report_stream) + _emit(f"status: {'changed' if changed else 'unchanged'}", report_stream) + _emit(f"output: {target}", report_stream) + _emit(f"duplicates removed: {dupes}", report_stream) + _emit(f"null rows removed: {nulls}", report_stream) + + +def _print_inventory(root: Path, report_stream: TextIO | None = None) -> None: + files = sorted(iter_data_files(root)) + _emit("\nData inventory", report_stream) + _emit("file,type,size_bytes", report_stream) + for path in files: + size = path.stat().st_size + _emit(f"{path},{path.suffix.lower()},{size}", report_stream) + + +def _column_tokens(name: str) -> set[str]: + return _field_name_tokens(name) + + +def _is_numeric_like_column(name: str) -> bool: + tokens = _column_tokens(name) + return bool(tokens.intersection(NUMERIC_COLUMN_HINTS)) + + +def _is_date_like_column(name: str) -> bool: + tokens = _column_tokens(name) + return bool(tokens.intersection(DATE_FIELD_HINTS)) + + +def _id_like_columns(df: pd.DataFrame) -> list[str]: + preferred = { + "id", + "title_id", + "userId", + "movieId", + "Text#", + "index", + "index number", + } + result: list[str] = [] + for col in df.columns: + lowered = str(col).strip().lower() + if col in preferred or lowered in preferred or lowered.endswith("_id"): + result.append(str(col)) + return result + + +def _print_csv_profile(path: Path, df: pd.DataFrame, report_stream: TextIO | None = None) -> None: + _emit(f"\n=== PROFILE: {path} ===", report_stream) + _emit(f"rows: {df.shape[0]}", report_stream) + _emit(f"columns: {df.shape[1]}", report_stream) + _emit("column names and dtypes:", report_stream) + _emit(df.dtypes.to_string(), report_stream) + + _emit("\nfirst 5 rows:", report_stream) + _emit(df.head(5).to_string(index=False), report_stream) + + null_count = df.isna().sum() + null_pct = (null_count / len(df) * 100) if len(df) else null_count.astype(float) + null_report = pd.DataFrame({"null_count": null_count, "null_pct": null_pct}).sort_values( + by=["null_count", "null_pct"], ascending=False + ) + _emit("\nnull report (sorted):", report_stream) + _emit(null_report.to_string(), report_stream) + + exact_dupes = int(df.duplicated().sum()) + _emit(f"\nexact duplicate rows: {exact_dupes}", report_stream) + if exact_dupes: + dup_index = df[df.duplicated()].index.tolist() + preview = dup_index[:MAX_DUPLICATE_INDEX_PRINT] + _emit( + f"duplicate index numbers (showing up to {MAX_DUPLICATE_INDEX_PRINT}): {preview}", + report_stream, + ) + _emit(df[df.duplicated()].head(MAX_DUPLICATE_INDEX_PRINT).to_string(index=True), report_stream) + + for col in _id_like_columns(df): + dup_mask = df[col].duplicated(keep=False) & df[col].notna() + dup_count = int(dup_mask.sum()) + if dup_count: + _emit(f"\nid duplicate rows for {col}: {dup_count}", report_stream) + dup_indices = df[dup_mask].index.tolist() + preview = dup_indices[:MAX_DUPLICATE_INDEX_PRINT] + _emit( + f"duplicate index numbers ({col}, showing up to {MAX_DUPLICATE_INDEX_PRINT}): {preview}", + report_stream, + ) + _emit( + df.loc[dup_mask, [col]].sort_values(by=col).head(MAX_DUPLICATE_INDEX_PRINT).to_string(index=True), + report_stream, + ) + + numeric_candidates = [c for c in df.columns if _is_numeric_like_column(str(c))] + if numeric_candidates: + _emit("\nnumeric column stats:", report_stream) + for col in numeric_candidates: + coerced = pd.to_numeric(df[col], errors="coerce") + non_numeric_mask = df[col].notna() & coerced.isna() + if non_numeric_mask.any(): + _emit(f"non-numeric values in {col}: {int(non_numeric_mask.sum())}", report_stream) + _emit(df.loc[non_numeric_mask, [col]].head(20).to_string(index=True), report_stream) + valid = coerced.dropna() + if len(valid): + _emit(f"{col}: min={valid.min()} max={valid.max()} mean={valid.mean()}", report_stream) + + date_candidates = [c for c in df.columns if _is_date_like_column(str(c))] + if date_candidates: + _emit("\ndate parsing checks:", report_stream) + for col in date_candidates: + parsed = pd.to_datetime(df[col], errors="coerce") + failed = int((df[col].notna() & parsed.isna()).sum()) + _emit(f"unparseable dates in {col}: {failed}", report_stream) + + +def _normalize_text_columns(df: pd.DataFrame) -> pd.DataFrame: + text_cols = [col for col in df.columns if pd.api.types.is_object_dtype(df[col])] + for col in text_cols: + df[col] = ( + df[col] + .astype("string") + .str.strip() + .str.replace(r"\s+", " ", regex=True) + ) + df[col] = df[col].replace("", pd.NA) + return df + + +def _clean_csv_dataframe(df: pd.DataFrame) -> pd.DataFrame: + cleaned = df.copy() + cleaned = _normalize_text_columns(cleaned) + cleaned = cleaned.drop_duplicates() + + required_cols = _id_like_columns(cleaned) + if required_cols: + for col in required_cols: + cleaned = cleaned[cleaned[col].notna()] + + for col in cleaned.columns: + if _is_numeric_like_column(str(col)): + cleaned[col] = pd.to_numeric(cleaned[col], errors="coerce") + + for col in cleaned.columns: + if _is_date_like_column(str(col)): + parsed = pd.to_datetime(cleaned[col], errors="coerce") + cleaned[col] = parsed.dt.strftime("%Y-%m-%d") + + for col in required_cols: + cleaned = cleaned[cleaned[col].notna()] + + cleaned = cleaned.drop_duplicates() + + return cleaned.reset_index(drop=True) + + +def _validation_unique_key_columns(df: pd.DataFrame, name: str) -> list[str]: + dataset_name = Path(name).name + overrides = { + "credits.csv": ["id"], + "keywords.csv": ["id"], + "links.csv": ["movieId"], + "links_small.csv": ["movieId"], + "periodical-issues.csv": [], + "periodical-titles.csv": [], + "pg_catalog.csv": [], + "ratings_small.csv": [], + "tcc_ceds_music.csv": [], + } + if dataset_name in overrides: + return [col for col in overrides[dataset_name] if col in df.columns] + + return [col for col in _id_like_columns(df) if str(col) == "id"] + + +def _validate_cleaned_csv(df: pd.DataFrame, name: str, report_stream: TextIO | None = None) -> None: + if int(df.duplicated().sum()) != 0: + raise AssertionError(f"{name}: exact duplicates remain after cleaning") + + for col in _validation_unique_key_columns(df, name): + if int(df[col].duplicated().sum()) != 0: + raise AssertionError(f"{name}: duplicate id values remain in {col}") + + if "rating" in df.columns: + rating = pd.to_numeric(df["rating"], errors="coerce").dropna() + if not rating.empty and ((rating < 0).any() or (rating > 5).any()): + raise AssertionError(f"{name}: rating outside expected range [0, 5]") + + if "pages" in df.columns: + pages = pd.to_numeric(df["pages"], errors="coerce").dropna() + if not pages.empty and (pages < 0).any(): + raise AssertionError(f"{name}: pages contains negative values") + + _emit(f"validation passed: {name}", report_stream) + + +def clean_csv_content(text: str, trim_fields: bool) -> tuple[str, int, int]: + """Returns (cleaned_text, duplicates_removed, null_rows_removed).""" + sample = text[:8192] + try: + dialect = csv.Sniffer().sniff(sample) + except csv.Error: + dialect = csv.excel + + rows = list(csv.reader(io.StringIO(text), dialect=dialect)) + + header: list[str] | None = None + cleaned_rows: list[list[str]] = [] + seen: set[tuple[str, ...]] = set() + duplicates_removed = 0 + null_rows_removed = 0 + + for i, row in enumerate(rows): + base_row = [cell.strip() for cell in row] if trim_fields else list(row) + if not base_row: + continue + # Keep header row as-is + if i == 0: + header = [_normalize_whitespace(cell) for cell in base_row] + cleaned_rows.append(header) + continue + + normalized: list[str] = [] + for idx, cell in enumerate(base_row): + col_name = header[idx] if header and idx < len(header) else "" + value = _normalize_whitespace(cell) + + if _is_null_like(value): + normalized.append("") + continue + + if _is_date_field_name(col_name): + iso_date = standardize_dates(value) + if iso_date is not None: + normalized.append(iso_date) + continue + + if _should_skip_type_coercion(col_name): + normalized.append(value) + continue + + tokens = _field_name_tokens(col_name) + if tokens.intersection(INT_COLUMN_HINTS) and re.fullmatch(r"[+-]?\d+", value): + normalized.append(str(int(value))) + continue + if tokens.intersection(FLOAT_COLUMN_HINTS) and re.fullmatch(r"[+-]?\d*\.\d+", value): + normalized.append(str(float(value))) + continue + if tokens.intersection(BOOL_COLUMN_HINTS) and value.lower() in {"true", "false"}: + normalized.append(value.lower()) + continue + + normalized.append(value) + + # Drop rows where every field is empty/null + if all(_is_null_like(cell) for cell in normalized): + null_rows_removed += 1 + continue + # Drop rows that are entirely blank + if all(cell.strip() == "" for cell in normalized): + null_rows_removed += 1 + continue + # Drop duplicate rows (compare data rows only, not header) + key = tuple(normalized) + if key in seen: + duplicates_removed += 1 + continue + seen.add(key) + cleaned_rows.append(normalized) + + out = io.StringIO() + writer = csv.writer( + out, + delimiter=getattr(dialect, "delimiter", ","), + quotechar=getattr(dialect, "quotechar", '"'), + quoting=csv.QUOTE_MINIMAL, + lineterminator="\n", + ) + writer.writerows(cleaned_rows) + return out.getvalue(), duplicates_removed, null_rows_removed + + +def _drop_nulls(obj: object) -> object: + """Recursively remove null values from dicts and lists.""" + if isinstance(obj, dict): + out: dict[str, object] = {} + for key, value in obj.items(): + if value is None: + continue + cleaned = _drop_nulls(value) + if cleaned is None: + continue + out[key] = cleaned + return out + if isinstance(obj, list): + out_list: list[object] = [] + for item in obj: + if item is None: + continue + cleaned_item = _drop_nulls(item) + if cleaned_item is None: + continue + out_list.append(cleaned_item) + return out_list + return obj + + +def clean_json_content(text: str) -> str: + payload = json.loads(text) + normalized = _normalize_value(payload) + cleaned = _drop_nulls(normalized) + return json.dumps(cleaned, indent=2, sort_keys=True, ensure_ascii=False) + "\n" + + +def clean_ndjson_content(text: str) -> tuple[str, int, int]: + """Returns (cleaned_text, duplicates_removed, null_rows_removed).""" + lines: list[str] = [] + seen: set[str] = set() + duplicates_removed = 0 + null_rows_removed = 0 + + for line in text.splitlines(): + if not line.strip(): + continue + obj = json.loads(line) + # Drop null-only objects + if obj is None or obj == {}: + null_rows_removed += 1 + continue + cleaned_obj = _drop_nulls(_normalize_value(obj)) + serialized = json.dumps(cleaned_obj, sort_keys=True, ensure_ascii=False, separators=(",", ":")) + if serialized in seen: + duplicates_removed += 1 + continue + seen.add(serialized) + lines.append(serialized) + return "\n".join(lines) + ("\n" if lines else ""), duplicates_removed, null_rows_removed + + +def clean_file(path: Path, trim_fields: bool) -> tuple[str, int, int]: + """Returns (cleaned_text, duplicates_removed, null_rows_removed).""" + original = path.read_text(encoding="utf-8-sig") + suffix = path.suffix.lower() + if suffix == ".csv": + return clean_csv_content(original, trim_fields=trim_fields) + if suffix == ".json": + # JSON is a single object; no row-level dedup/null concept + return clean_json_content(original), 0, 0 + if suffix == ".ndjson": + return clean_ndjson_content(original) + raise ValueError(f"Unsupported file type: {path}") + + +def write_output(cleaned: str, source: Path, root: Path, output_dir: Path | None, in_place: bool) -> Path: + if in_place: + source.write_text(cleaned, encoding="utf-8", newline="") + with source.open("w", encoding="utf-8", newline="") as f: + f.write(cleaned) + return source + + assert output_dir is not None + target = output_dir / source.relative_to(root) + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(cleaned, encoding="utf-8", newline="") + with target.open("w", encoding="utf-8", newline="") as f: + f.write(cleaned) + return target + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Clean CSV/JSON/NDJSON files by normalizing formatting and removing blank rows/lines." + ) + parser.add_argument("--root", type=Path, default=Path("."), help="Root directory to scan (default: current dir).") + parser.add_argument( + "--output-dir", + type=Path, + default=Path("cleaned_data"), + help="Destination root for cleaned files when not using --in-place (default: cleaned_data).", + ) + parser.add_argument("--in-place", action="store_true", help="Overwrite original files.") + parser.add_argument("--trim-fields", action="store_true", help="Trim leading/trailing spaces from CSV fields.") + parser.add_argument("--dry-run", action="store_true", help="Report files that would change without writing output.") + parser.add_argument( + "--validate-sample", + nargs=2, + metavar=("TITLES_CSV", "ISSUES_CSV"), + help="After cleaning, validate a golden sample pair against business rules.", + parser.add_argument("--inventory", action="store_true", help="Print file name, type, and size inventory.") + parser.add_argument("--profile-csv", action="store_true", help="Print CSV shape, dtypes, null, duplicate, and type checks.") + parser.add_argument("--validate-csv", action="store_true", help="Run post-clean CSV validation checks.") + parser.add_argument( + "--max-file-mb", + type=int, + default=DEFAULT_MAX_FILE_MB, + help="Skip files larger than this size in MB. Default: no size-based skipping.", + ) + parser.add_argument( + "--write-reports", + action="store_true", + help="Write inventory/profile/validation output to sidecar report files next to each source file.", + ) + parser.add_argument( + "--report-dir", + type=Path, + default=None, + help="Directory for report files when using --write-reports (default: next to each source file).", + ) + args = parser.parse_args() + + root = args.root.resolve() + output_dir = None if args.in_place else args.output_dir.resolve() + + inventory_stream: TextIO | None = None + if args.inventory and args.write_reports: + if args.report_dir is not None: + inventory_path = args.report_dir.resolve() / "inventory_report.txt" + else: + inventory_path = root / "inventory_report.txt" + inventory_path.parent.mkdir(parents=True, exist_ok=True) + inventory_stream = inventory_path.open("w", encoding="utf-8") + + try: + if args.inventory: + _print_inventory(root, report_stream=inventory_stream) + finally: + if inventory_stream is not None: + inventory_stream.close() + + changed = 0 + unchanged = 0 + failures = 0 + total_duplicates = 0 + total_nulls = 0 + + for path in sorted(iter_data_files(root)): + try: + cleaned, dupes, nulls = clean_file(path, trim_fields=args.trim_fields) + original = path.read_text(encoding="utf-8-sig") + if cleaned == original and dupes == 0 and nulls == 0: + unchanged += 1 + continue + changed += 1 + total_duplicates += dupes + total_nulls += nulls + if args.dry_run: + print(f"WOULD CLEAN: {path} (dupes={dupes}, nulls={nulls})") + continue + target = write_output(cleaned, path, root, output_dir, args.in_place) + print(f"CLEANED: {path} -> {target} (dupes={dupes}, nulls={nulls})") + except Exception as exc: # pragma: no cover - defensive path for malformed files + failures += 1 + print(f"FAILED: {path} ({exc})") + interrupted = False + per_file_summary: list[tuple[str, str, str, int | None, int | None]] = [] + max_file_bytes = None if args.max_file_mb is None else args.max_file_mb * 1024 * 1024 + files_to_process = sorted(iter_data_files(root)) + try: + for path in files_to_process: + report_path: Path | None = None + report_stream: TextIO | None = None + if max_file_bytes is not None and path.stat().st_size > max_file_bytes: + if args.write_reports and _should_write_sidecar_report(path): + report_path = _report_path_for(path, root, args.report_dir) + report_path.parent.mkdir(parents=True, exist_ok=True) + report_stream = report_path.open("w", encoding="utf-8") + _emit( + f"SKIPPED (too large, >{args.max_file_mb}MB): {path}", + report_stream, + ) + report_stream.close() + else: + print(f"SKIPPED (too large, >{args.max_file_mb}MB): {path}") + per_file_summary.append((str(path), "skipped", "", None, None)) + continue + try: + if args.write_reports and _should_write_sidecar_report(path): + report_path = _report_path_for(path, root, args.report_dir) + report_path.parent.mkdir(parents=True, exist_ok=True) + report_stream = report_path.open("w", encoding="utf-8") + + if path.suffix.lower() == ".csv" and (args.profile_csv or args.validate_csv): + df_before = pd.read_csv(path, low_memory=False, on_bad_lines='warn') + if args.profile_csv: + _print_csv_profile(path, df_before, report_stream=report_stream) + + cleaned, dupes, nulls = clean_file(path, trim_fields=args.trim_fields) + original = path.read_text(encoding="utf-8-sig") + if cleaned == original and dupes == 0 and nulls == 0: + unchanged += 1 + _emit(f"UNCHANGED: {path}", report_stream) + per_file_summary.append((str(path), "unchanged", str(path), 0, 0)) + continue + changed += 1 + total_duplicates += dupes + total_nulls += nulls + if args.dry_run: + _emit(f"WOULD CLEAN: {path} (dupes={dupes}, nulls={nulls})", report_stream) + if path.suffix.lower() == ".csv" and args.validate_csv: + validated_df = _clean_csv_dataframe(df_before) + _validate_cleaned_csv(validated_df, path.name, report_stream=report_stream) + per_file_summary.append((str(path), "would_clean", str(path), dupes, nulls)) + continue + target = write_output(cleaned, path, root, output_dir, args.in_place) + _emit(f"CLEANED: {path} -> {target} (dupes={dupes}, nulls={nulls})", report_stream) + if path.suffix.lower() == ".csv" and args.validate_csv: + validated_df = _clean_csv_dataframe(pd.read_csv(target, low_memory=False, on_bad_lines='warn')) + _validate_cleaned_csv(validated_df, target.name, report_stream=report_stream) + elif report_stream is not None: + _print_non_csv_report(path, target, changed=True, dupes=dupes, nulls=nulls, report_stream=report_stream) + per_file_summary.append((str(path), "cleaned", str(target), dupes, nulls)) + except Exception as exc: # pragma: no cover - defensive path for malformed files + failures += 1 + _emit(f"FAILED: {path} ({exc})", report_stream) + per_file_summary.append((str(path), "failed", "", None, None)) + finally: + if report_stream is not None: + report_stream.close() + except KeyboardInterrupt: + interrupted = True + print("\nInterrupted by user. Printing partial summary...") + + mode = "in-place" if args.in_place else f"output-dir={output_dir}" + print("\nSummary") + print(f"mode: {mode}") + print(f"changed: {changed}") + print(f"unchanged: {unchanged}") + print(f"duplicates removed: {total_duplicates}") + print(f"null rows removed: {total_nulls}") + print(f"failed: {failures}") + + sample_failures = 0 + if args.validate_sample: + titles_path, issues_path = (Path(p) for p in args.validate_sample) + if _validate_sample is None: + print("WARNING: validate_sample module not found; skipping business-rule validation.") + else: + violations = _validate_sample(titles_path, issues_path) + if violations: + sample_failures = len(violations) + print(f"\nSample validation FAILED — {sample_failures} violation(s):") + for v in violations: + print(f" {v}") + else: + print("\nSample validation OK — all business rules satisfied.") + + return 1 if (failures or sample_failures) else 0 + + +if __name__ == "__main__": + raise SystemExit(main()) + print(f"interrupted: {interrupted}") + + if per_file_summary: + print("\nPer-file summary") + for file_path, status, output_path, dupes, nulls in per_file_summary: + output_display = output_path if output_path else "-" + if status == "cleaned": + print( + f"CLEANED: {file_path} -> {output_display} " + f"(duplicates_removed={dupes}, null_rows_removed={nulls})" + ) + elif status == "would_clean": + print( + f"WOULD_CLEAN: {file_path} -> {output_display} " + f"(duplicates_removed={dupes}, null_rows_removed={nulls})" + ) + elif status == "unchanged": + print( + f"UNCHANGED: {file_path} -> {output_display} " + "(duplicates_removed=0, null_rows_removed=0)" + ) + else: + print( + f"{status.upper()}: {file_path} -> {output_display} " + "(duplicates_removed=n/a, null_rows_removed=n/a)" + ) + + if interrupted: + return 130 + return 1 if failures else 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/credits.csv.report.txt b/credits.csv.report.txt new file mode 100644 index 0000000..c150587 --- /dev/null +++ b/credits.csv.report.txt @@ -0,0 +1,156 @@ + +=== PROFILE: /Users/nicky/Projects/CentralLibraryData/credits.csv === +rows: 45476 +columns: 3 +column names and dtypes: +cast str +crew str +id int64 + +first 5 rows: + cast crew id + [{'cast_id': 14, 'character': 'Woody (voice)', 'credit_id': '52fe4284c3a36847f8024f95', 'gender': 2, 'id': 31, 'name': 'Tom Hanks', 'order': 0, 'profile_path': '/pQFoyx7rp09CJTAb932F2g8Nlho.jpg'}, {'cast_id': 15, 'character': 'Buzz Lightyear (voice)', 'credit_id': '52fe4284c3a36847f8024f99', 'gender': 2, 'id': 12898, 'name': 'Tim Allen', 'order': 1, 'profile_path': '/uX2xVf6pMmPepxnvFWyBtjexzgY.jpg'}, {'cast_id': 16, 'character': 'Mr. Potato Head (voice)', 'credit_id': '52fe4284c3a36847f8024f9d', 'gender': 2, 'id': 7167, 'name': 'Don Rickles', 'order': 2, 'profile_path': '/h5BcaDMPRVLHLDzbQavec4xfSdt.jpg'}, {'cast_id': 17, 'character': 'Slinky Dog (voice)', 'credit_id': '52fe4284c3a36847f8024fa1', 'gender': 2, 'id': 12899, 'name': 'Jim Varney', 'order': 3, 'profile_path': '/eIo2jVVXYgjDtaHoF19Ll9vtW7h.jpg'}, {'cast_id': 18, 'character': 'Rex (voice)', 'credit_id': '52fe4284c3a36847f8024fa5', 'gender': 2, 'id': 12900, 'name': 'Wallace Shawn', 'order': 4, 'profile_path': '/oGE6JqPP2xH4tNORKNqxbNPYi7u.jpg'}, {'cast_id': 19, 'character': 'Hamm (voice)', 'credit_id': '52fe4284c3a36847f8024fa9', 'gender': 2, 'id': 7907, 'name': 'John Ratzenberger', 'order': 5, 'profile_path': '/yGechiKWL6TJDfVE2KPSJYqdMsY.jpg'}, {'cast_id': 20, 'character': 'Bo Peep (voice)', 'credit_id': '52fe4284c3a36847f8024fad', 'gender': 1, 'id': 8873, 'name': 'Annie Potts', 'order': 6, 'profile_path': '/eryXT84RL41jHSJcMy4kS3u9y6w.jpg'}, {'cast_id': 26, 'character': 'Andy (voice)', 'credit_id': '52fe4284c3a36847f8024fc1', 'gender': 0, 'id': 1116442, 'name': 'John Morris', 'order': 7, 'profile_path': '/vYGyvK4LzeaUCoNSHtsuqJUY15M.jpg'}, {'cast_id': 22, 'character': 'Sid (voice)', 'credit_id': '52fe4284c3a36847f8024fb1', 'gender': 2, 'id': 12901, 'name': 'Erik von Detten', 'order': 8, 'profile_path': '/twnF1ZaJ1FUNUuo6xLXwcxjayBE.jpg'}, {'cast_id': 23, 'character': 'Mrs. Davis (voice)', 'credit_id': '52fe4284c3a36847f8024fb5', 'gender': 1, 'id': 12133, 'name': 'Laurie Metcalf', 'order': 9, 'profile_path': '/unMMIT60eoBM2sN2nyR7EZ2BvvD.jpg'}, {'cast_id': 24, 'character': 'Sergeant (voice)', 'credit_id': '52fe4284c3a36847f8024fb9', 'gender': 2, 'id': 8655, 'name': 'R. Lee Ermey', 'order': 10, 'profile_path': '/r8GBqFBjypLUP9VVqDqfZ7wYbSs.jpg'}, {'cast_id': 25, 'character': 'Hannah (voice)', 'credit_id': '52fe4284c3a36847f8024fbd', 'gender': 1, 'id': 12903, 'name': 'Sarah Freeman', 'order': 11, 'profile_path': None}, {'cast_id': 27, 'character': 'TV Announcer (voice)', 'credit_id': '52fe4284c3a36847f8024fc5', 'gender': 2, 'id': 37221, 'name': 'Penn Jillette', 'order': 12, 'profile_path': '/zmAaXUdx12NRsssgHbk1T31j2x9.jpg'}] [{'credit_id': '52fe4284c3a36847f8024f49', 'department': 'Directing', 'gender': 2, 'id': 7879, 'job': 'Director', 'name': 'John Lasseter', 'profile_path': '/7EdqiNbr4FRjIhKHyPPdFfEEEFG.jpg'}, {'credit_id': '52fe4284c3a36847f8024f4f', 'department': 'Writing', 'gender': 2, 'id': 12891, 'job': 'Screenplay', 'name': 'Joss Whedon', 'profile_path': '/dTiVsuaTVTeGmvkhcyJvKp2A5kr.jpg'}, {'credit_id': '52fe4284c3a36847f8024f55', 'department': 'Writing', 'gender': 2, 'id': 7, 'job': 'Screenplay', 'name': 'Andrew Stanton', 'profile_path': '/pvQWsu0qc8JFQhMVJkTHuexUAa1.jpg'}, {'credit_id': '52fe4284c3a36847f8024f5b', 'department': 'Writing', 'gender': 2, 'id': 12892, 'job': 'Screenplay', 'name': 'Joel Cohen', 'profile_path': '/dAubAiZcvKFbboWlj7oXOkZnTSu.jpg'}, {'credit_id': '52fe4284c3a36847f8024f61', 'department': 'Writing', 'gender': 0, 'id': 12893, 'job': 'Screenplay', 'name': 'Alec Sokolow', 'profile_path': '/v79vlRYi94BZUQnkkyznbGUZLjT.jpg'}, {'credit_id': '52fe4284c3a36847f8024f67', 'department': 'Production', 'gender': 1, 'id': 12894, 'job': 'Producer', 'name': 'Bonnie Arnold', 'profile_path': None}, {'credit_id': '52fe4284c3a36847f8024f6d', 'department': 'Production', 'gender': 0, 'id': 12895, 'job': 'Executive Producer', 'name': 'Ed Catmull', 'profile_path': None}, {'credit_id': '52fe4284c3a36847f8024f73', 'department': 'Production', 'gender': 2, 'id': 12896, 'job': 'Producer', 'name': 'Ralph Guggenheim', 'profile_path': None}, {'credit_id': '52fe4284c3a36847f8024f79', 'department': 'Production', 'gender': 2, 'id': 12897, 'job': 'Executive Producer', 'name': 'Steve Jobs', 'profile_path': '/mOMP3SwD5qWQSR0ldCIByd3guTV.jpg'}, {'credit_id': '52fe4284c3a36847f8024f8b', 'department': 'Editing', 'gender': 2, 'id': 8, 'job': 'Editor', 'name': 'Lee Unkrich', 'profile_path': '/bdTCCXjgOV3YyaNmLGYGOxFQMOc.jpg'}, {'credit_id': '52fe4284c3a36847f8024f91', 'department': 'Art', 'gender': 2, 'id': 7883, 'job': 'Art Direction', 'name': 'Ralph Eggleston', 'profile_path': '/uUfcGKDsKO1aROMpXRs67Hn6RvR.jpg'}, {'credit_id': '598331bf925141421201044b', 'department': 'Editing', 'gender': 2, 'id': 1168870, 'job': 'Editor', 'name': 'Robert Gordon', 'profile_path': None}, {'credit_id': '5892168cc3a36809660095f9', 'department': 'Sound', 'gender': 0, 'id': 1552883, 'job': 'Foley Editor', 'name': 'Mary Helen Leasman', 'profile_path': None}, {'credit_id': '5531824d9251415289000945', 'department': 'Visual Effects', 'gender': 0, 'id': 1453514, 'job': 'Animation', 'name': 'Kim Blanchette', 'profile_path': None}, {'credit_id': '589215969251412dcb009bf6', 'department': 'Sound', 'gender': 0, 'id': 1414182, 'job': 'ADR Editor', 'name': 'Marilyn McCoppen', 'profile_path': None}, {'credit_id': '589217099251412dc500a018', 'department': 'Sound', 'gender': 2, 'id': 7885, 'job': 'Orchestrator', 'name': 'Randy Newman', 'profile_path': '/w0JzfoiM25nrnxYOzosPHRq6mlE.jpg'}, {'credit_id': '5693e6b29251417b0e0000e3', 'department': 'Editing', 'gender': 0, 'id': 1429549, 'job': 'Color Timer', 'name': 'Dale E. Grahn', 'profile_path': None}, {'credit_id': '572e2522c3a36869e6001a9c', 'department': 'Visual Effects', 'gender': 0, 'id': 7949, 'job': 'CG Painter', 'name': 'Robin Cooper', 'profile_path': None}, {'credit_id': '574f12309251415ca1000012', 'department': 'Writing', 'gender': 2, 'id': 7879, 'job': 'Original Story', 'name': 'John Lasseter', 'profile_path': '/7EdqiNbr4FRjIhKHyPPdFfEEEFG.jpg'}, {'credit_id': '574f1240c3a3682e7300001c', 'department': 'Writing', 'gender': 2, 'id': 12890, 'job': 'Original Story', 'name': 'Pete Docter', 'profile_path': '/r6ngPgnReA3RHmKjmSoVsc6Awjp.jpg'}, {'credit_id': '574f12519251415c92000015', 'department': 'Writing', 'gender': 0, 'id': 7911, 'job': 'Original Story', 'name': 'Joe Ranft', 'profile_path': '/f1BoWC2JbCcfP1e5hKfGsxkHzVU.jpg'}, {'credit_id': '574f12cec3a3682e82000022', 'department': 'Crew', 'gender': 0, 'id': 1629419, 'job': 'Post Production Supervisor', 'name': 'Patsy Bouge', 'profile_path': None}, {'credit_id': '574f14f19251415ca1000082', 'department': 'Art', 'gender': 0, 'id': 7961, 'job': 'Sculptor', 'name': 'Norm DeCarlo', 'profile_path': None}, {'credit_id': '5751ae4bc3a3683772002b7f', 'department': 'Visual Effects', 'gender': 2, 'id': 12905, 'job': 'Animation Director', 'name': 'Ash Brannon', 'profile_path': '/6ueWgPEEBHvS3De2BHYQnYjRTig.jpg'}, {'credit_id': '5891edbe9251412dc5007cd6', 'department': 'Sound', 'gender': 2, 'id': 7885, 'job': 'Music', 'name': 'Randy Newman', 'profile_path': '/w0JzfoiM25nrnxYOzosPHRq6mlE.jpg'}, {'credit_id': '589213d39251412dc8009832', 'department': 'Directing', 'gender': 0, 'id': 1748707, 'job': 'Layout', 'name': 'Roman Figun', 'profile_path': None}, {'credit_id': '5892173dc3a3680968009351', 'department': 'Sound', 'gender': 2, 'id': 4949, 'job': 'Orchestrator', 'name': 'Don Davis', 'profile_path': None}, {'credit_id': '589217cec3a3686b0a0052ba', 'department': 'Sound', 'gender': 0, 'id': 1372885, 'job': 'Music Editor', 'name': 'James Flamberg', 'profile_path': None}, {'credit_id': '58921831c3a3686348004a64', 'department': 'Editing', 'gender': 0, 'id': 1739962, 'job': 'Negative Cutter', 'name': 'Mary Beth Smith', 'profile_path': None}, {'credit_id': '58921838c3a36809700096c0', 'department': 'Editing', 'gender': 0, 'id': 1748513, 'job': 'Negative Cutter', 'name': 'Rick Mackay', 'profile_path': None}, {'credit_id': '589218429251412dd1009d1b', 'department': 'Art', 'gender': 0, 'id': 1458006, 'job': 'Title Designer', 'name': 'Susan Bradley', 'profile_path': None}, {'credit_id': '5891ed99c3a3680966007670', 'department': 'Crew', 'gender': 0, 'id': 1748557, 'job': 'Supervising Technical Director', 'name': 'William Reeves', 'profile_path': None}, {'credit_id': '5891edcec3a3686b0a002eb2', 'department': 'Sound', 'gender': 2, 'id': 7885, 'job': 'Songs', 'name': 'Randy Newman', 'profile_path': '/w0JzfoiM25nrnxYOzosPHRq6mlE.jpg'}, {'credit_id': '5891edf9c3a36809700075e6', 'department': 'Writing', 'gender': 2, 'id': 7, 'job': 'Original Story', 'name': 'Andrew Stanton', 'profile_path': '/pvQWsu0qc8JFQhMVJkTHuexUAa1.jpg'}, {'credit_id': '58920f0b9251412dd7009104', 'department': 'Crew', 'gender': 2, 'id': 12890, 'job': 'Supervising Animator', 'name': 'Pete Docter', 'profile_path': '/r6ngPgnReA3RHmKjmSoVsc6Awjp.jpg'}, {'credit_id': '58920f1fc3a3680977009021', 'department': 'Sound', 'gender': 2, 'id': 2216, 'job': 'Sound Designer', 'name': 'Gary Rydstrom', 'profile_path': '/jZpr1nVfO7lldWI0YtmP1FGw7Rj.jpg'}, {'credit_id': '58920f389251412dd700912d', 'department': 'Production', 'gender': 0, 'id': 12909, 'job': 'Production Supervisor', 'name': 'Karen Robert Jackson', 'profile_path': None}, {'credit_id': '58920fbd9251412dcb00969c', 'department': 'Crew', 'gender': 0, 'id': 953331, 'job': 'Executive Music Producer', 'name': 'Chris Montan', 'profile_path': None}, {'credit_id': '589210069251412dd7009219', 'department': 'Visual Effects', 'gender': 0, 'id': 7893, 'job': 'Animation Director', 'name': 'Rich Quade', 'profile_path': None}, {'credit_id': '589210329251412dcd00943b', 'department': 'Visual Effects', 'gender': 0, 'id': 8025, 'job': 'Animation', 'name': 'Michael Berenstein', 'profile_path': None}, {'credit_id': '5892103bc3a368096a009180', 'department': 'Visual Effects', 'gender': 0, 'id': 78009, 'job': 'Animation', 'name': 'Colin Brady', 'profile_path': None}, {'credit_id': '5892105dc3a3680968008db2', 'department': 'Visual Effects', 'gender': 0, 'id': 1748682, 'job': 'Animation', 'name': 'Davey Crockett Feiten', 'profile_path': None}, {'credit_id': '589210669251412dcd009466', 'department': 'Visual Effects', 'gender': 0, 'id': 1454030, 'job': 'Animation', 'name': 'Angie Glocka', 'profile_path': None}, {'credit_id': '5892107c9251412dd1009613', 'department': 'Visual Effects', 'gender': 0, 'id': 1748683, 'job': 'Animation', 'name': 'Rex Grignon', 'profile_path': None}, {'credit_id': '5892108ac3a3680973008d3f', 'department': 'Visual Effects', 'gender': 0, 'id': 1748684, 'job': 'Animation', 'name': 'Tom K. Gurney', 'profile_path': None}, {'credit_id': '58921093c3a3686348004477', 'department': 'Visual Effects', 'gender': 2, 'id': 8029, 'job': 'Animation', 'name': 'Jimmy Hayward', 'profile_path': '/lTDRpudEY7BDwTefXbXzMlmb0ui.jpg'}, {'credit_id': '5892109b9251412dcd0094b0', 'department': 'Visual Effects', 'gender': 0, 'id': 1426773, 'job': 'Animation', 'name': 'Hal T. Hickel', 'profile_path': None}, {'credit_id': '589210a29251412dc5009a29', 'department': 'Visual Effects', 'gender': 0, 'id': 8035, 'job': 'Animation', 'name': 'Karen Kiser', 'profile_path': None}, {'credit_id': '589210ccc3a3680977009191', 'department': 'Visual Effects', 'gender': 0, 'id': 1748688, 'job': 'Animation', 'name': 'Anthony B. LaMolinara', 'profile_path': None}, {'credit_id': '589210d7c3a3686b0a004c1f', 'department': 'Visual Effects', 'gender': 0, 'id': 587314, 'job': 'Animation', 'name': 'Guionne Leroy', 'profile_path': None}, {'credit_id': '589210e1c3a36809770091a7', 'department': 'Visual Effects', 'gender': 2, 'id': 7918, 'job': 'Animation', 'name': 'Bud Luckey', 'profile_path': '/pcCh7G19FKMNijmPQg1PMH1btic.jpg'}, {'credit_id': '589210ee9251412dc200978a', 'department': 'Visual Effects', 'gender': 0, 'id': 1748689, 'job': 'Animation', 'name': 'Les Major', 'profile_path': None}, {'credit_id': '589210fa9251412dc8009595', 'department': 'Visual Effects', 'gender': 2, 'id': 7892, 'job': 'Animation', 'name': 'Glenn McQueen', 'profile_path': None}, {'credit_id': '589211029251412dc8009598', 'department': 'Visual Effects', 'gender': 0, 'id': 555795, 'job': 'Animation', 'name': 'Mark Oftedal', 'profile_path': None}, {'credit_id': '5892110b9251412dc800959d', 'department': 'Visual Effects', 'gender': 2, 'id': 7882, 'job': 'Animation', 'name': 'Jeff Pidgeon', 'profile_path': '/yLddkg5HcgbJg00cS13GVBnP0HY.jpg'}, {'credit_id': '58921113c3a36863480044e4', 'department': 'Visual Effects', 'gender': 0, 'id': 8017, 'job': 'Animation', 'name': 'Jeff Pratt', 'profile_path': None}, {'credit_id': '5892111c9251412dcb0097e9', 'department': 'Visual Effects', 'gender': 0, 'id': 1184140, 'job': 'Animation', 'name': 'Steve Rabatich', 'profile_path': None}, {'credit_id': '58921123c3a36809700090f6', 'department': 'Visual Effects', 'gender': 0, 'id': 8049, 'job': 'Animation', 'name': 'Roger Rose', 'profile_path': None}, {'credit_id': '5892112b9251412dcb0097fb', 'department': 'Visual Effects', 'gender': 0, 'id': 1509559, 'job': 'Animation', 'name': 'Steve Segal', 'profile_path': None}, {'credit_id': '589211349251412dc80095c3', 'department': 'Visual Effects', 'gender': 0, 'id': 1748691, 'job': 'Animation', 'name': 'Doug Sheppeck', 'profile_path': None}, {'credit_id': '5892113cc3a3680970009106', 'department': 'Visual Effects', 'gender': 0, 'id': 8050, 'job': 'Animation', 'name': 'Alan Sperling', 'profile_path': None}, {'credit_id': '58921148c3a3686b0a004c99', 'department': 'Visual Effects', 'gender': 0, 'id': 8010, 'job': 'Animation', 'name': 'Doug Sweetland', 'profile_path': None}, {'credit_id': '58921150c3a3680966009125', 'department': 'Visual Effects', 'gender': 0, 'id': 8044, 'job': 'Animation', 'name': 'David Tart', 'profile_path': None}, {'credit_id': '589211629251412dc5009b00', 'department': 'Visual Effects', 'gender': 0, 'id': 1454034, 'job': 'Animation', 'name': 'Ken Willard', 'profile_path': None}, {'credit_id': '589211c1c3a3686b0a004d28', 'department': 'Visual Effects', 'gender': 0, 'id': 7887, 'job': 'Visual Effects Supervisor', 'name': 'Thomas Porter', 'profile_path': None}, {'credit_id': '589211d4c3a3680968008ed9', 'department': 'Visual Effects', 'gender': 0, 'id': 1406878, 'job': 'Visual Effects', 'name': 'Mark Thomas Henne', 'profile_path': None}, {'credit_id': '589211f59251412dd4008e65', 'department': 'Visual Effects', 'gender': 0, 'id': 1748698, 'job': 'Visual Effects', 'name': 'Oren Jacob', 'profile_path': None}, {'credit_id': '58921242c3a368096a00939b', 'department': 'Visual Effects', 'gender': 0, 'id': 1748699, 'job': 'Visual Effects', 'name': 'Darwyn Peachey', 'profile_path': None}, {'credit_id': '5892124b9251412dc5009bd2', 'department': 'Visual Effects', 'gender': 0, 'id': 1748701, 'job': 'Visual Effects', 'name': 'Mitch Prater', 'profile_path': None}, {'credit_id': '58921264c3a3686b0a004dbf', 'department': 'Visual Effects', 'gender': 0, 'id': 1748703, 'job': 'Visual Effects', 'name': 'Brian M. Rosen', 'profile_path': None}, {'credit_id': '589212709251412dcd009676', 'department': 'Lighting', 'gender': 1, 'id': 12912, 'job': 'Lighting Supervisor', 'name': 'Sharon Calahan', 'profile_path': None}, {'credit_id': '5892127fc3a3686b0a004de5', 'department': 'Lighting', 'gender': 0, 'id': 7899, 'job': 'Lighting Supervisor', 'name': 'Galyn Susman', 'profile_path': None}, {'credit_id': '589212cdc3a3680970009268', 'department': 'Visual Effects', 'gender': 0, 'id': 12915, 'job': 'CG Painter', 'name': 'William Cone', 'profile_path': None}, {'credit_id': '5892130f9251412dc8009791', 'department': 'Art', 'gender': 0, 'id': 1748705, 'job': 'Sculptor', 'name': 'Shelley Daniels Lekven', 'profile_path': None}, {'credit_id': '5892131c9251412dd4008f4c', 'department': 'Visual Effects', 'gender': 2, 'id': 7889, 'job': 'Character Designer', 'name': 'Bob Pauley', 'profile_path': None}, {'credit_id': '589213249251412dd100987b', 'department': 'Visual Effects', 'gender': 2, 'id': 7918, 'job': 'Character Designer', 'name': 'Bud Luckey', 'profile_path': '/pcCh7G19FKMNijmPQg1PMH1btic.jpg'}, {'credit_id': '5892132b9251412dc80097b1', 'department': 'Visual Effects', 'gender': 2, 'id': 7, 'job': 'Character Designer', 'name': 'Andrew Stanton', 'profile_path': '/pvQWsu0qc8JFQhMVJkTHuexUAa1.jpg'}, {'credit_id': '58921332c3a368634800467b', 'department': 'Visual Effects', 'gender': 0, 'id': 12915, 'job': 'Character Designer', 'name': 'William Cone', 'profile_path': None}, {'credit_id': '5892135f9251412dd4008f90', 'department': 'Visual Effects', 'gender': 0, 'id': 1748706, 'job': 'Character Designer', 'name': 'Steve Johnson', 'profile_path': None}, {'credit_id': '58921384c3a3680973008fd4', 'department': 'Visual Effects', 'gender': 0, 'id': 1176752, 'job': 'Character Designer', 'name': 'Dan Haskett', 'profile_path': None}, {'credit_id': '5892138e9251412dc20099fc', 'department': 'Visual Effects', 'gender': 0, 'id': 1088034, 'job': 'Character Designer', 'name': 'Tom Holloway', 'profile_path': '/a0r0T2usTBpgMI5aZbRBDW1fTl8.jpg'}, {'credit_id': '58921395c3a368097700942f', 'department': 'Visual Effects', 'gender': 0, 'id': 1447465, 'job': 'Character Designer', 'name': 'Jean Gillmore', 'profile_path': None}, {'credit_id': '589213e2c3a3680973009026', 'department': 'Directing', 'gender': 0, 'id': 1748709, 'job': 'Layout', 'name': 'Desirée Mourad', 'profile_path': None}, {'credit_id': '589214099251412dc5009d57', 'department': 'Art', 'gender': 0, 'id': 1748710, 'job': 'Set Dresser', 'name': "Kelly O'Connell", 'profile_path': None}, {'credit_id': '58921411c3a3686b0a004f70', 'department': 'Art', 'gender': 0, 'id': 1443471, 'job': 'Set Dresser', 'name': 'Sonoko Konishi', 'profile_path': None}, {'credit_id': '58921434c3a368096a00956e', 'department': 'Art', 'gender': 0, 'id': 1748711, 'job': 'Set Dresser', 'name': 'Ann M. Rockwell', 'profile_path': None}, {'credit_id': '5892144ac3a36809680090de', 'department': 'Editing', 'gender': 0, 'id': 1748712, 'job': 'Editorial Manager', 'name': 'Julie M. McDonald', 'profile_path': None}, {'credit_id': '58921479c3a368096800910f', 'department': 'Editing', 'gender': 0, 'id': 1589729, 'job': 'Assistant Editor', 'name': 'Robin Lee', 'profile_path': None}, {'credit_id': '5892148b9251412dd10099cc', 'department': 'Editing', 'gender': 0, 'id': 1748716, 'job': 'Assistant Editor', 'name': 'Tom Freeman', 'profile_path': None}, {'credit_id': '589214959251412dcb009b1f', 'department': 'Editing', 'gender': 0, 'id': 1748717, 'job': 'Assistant Editor', 'name': 'Ada Cochavi', 'profile_path': None}, {'credit_id': '5892149ec3a3686348004798', 'department': 'Editing', 'gender': 0, 'id': 1336438, 'job': 'Assistant Editor', 'name': 'Dana Mulligan', 'profile_path': None}, {'credit_id': '589214adc3a368096a0095db', 'department': 'Editing', 'gender': 0, 'id': 1748718, 'job': 'Editorial Coordinator', 'name': 'Deirdre Morrison', 'profile_path': None}, {'credit_id': '589214c7c3a368097700952b', 'department': 'Production', 'gender': 0, 'id': 1748719, 'job': 'Production Coordinator', 'name': 'Lori Lombardo', 'profile_path': None}, {'credit_id': '589214cec3a368096a009603', 'department': 'Production', 'gender': 0, 'id': 1748720, 'job': 'Production Coordinator', 'name': 'Ellen Devine', 'profile_path': None}, {'credit_id': '589214e39251412dc8009904', 'department': 'Crew', 'gender': 0, 'id': 1468014, 'job': 'Unit Publicist', 'name': 'Lauren Beth Strogoff', 'profile_path': None}, {'credit_id': '58921544c3a3686b0a00507d', 'department': 'Sound', 'gender': 2, 'id': 2216, 'job': 'Sound Re-Recording Mixer', 'name': 'Gary Rydstrom', 'profile_path': '/jZpr1nVfO7lldWI0YtmP1FGw7Rj.jpg'}, {'credit_id': '5892154c9251412dd1009a56', 'department': 'Sound', 'gender': 0, 'id': 1425978, 'job': 'Sound Re-Recording Mixer', 'name': 'Gary Summers', 'profile_path': None}, {'credit_id': '58921555c3a36809680091bd', 'department': 'Sound', 'gender': 2, 'id': 8276, 'job': 'Supervising Sound Editor', 'name': 'Tim Holland', 'profile_path': None}, {'credit_id': '589215c39251412dcb009c12', 'department': 'Sound', 'gender': 0, 'id': 7069, 'job': 'Sound Effects Editor', 'name': 'Pat Jackson', 'profile_path': None}, {'credit_id': '58921698c3a368096a009788', 'department': 'Crew', 'gender': 2, 'id': 15894, 'job': 'Sound Design Assistant', 'name': 'Tom Myers', 'profile_path': None}, {'credit_id': '589216a89251412dc2009ca4', 'department': 'Sound', 'gender': 0, 'id': 1414177, 'job': 'Assistant Sound Editor', 'name': 'J.R. Grubbs', 'profile_path': None}, {'credit_id': '589216c19251412dc2009cb9', 'department': 'Sound', 'gender': 1, 'id': 1748724, 'job': 'Assistant Sound Editor', 'name': 'Susan Sanford', 'profile_path': None}, {'credit_id': '589216ccc3a3680973009274', 'department': 'Sound', 'gender': 0, 'id': 1748725, 'job': 'Assistant Sound Editor', 'name': 'Susan Popovic', 'profile_path': None}, {'credit_id': '589216d79251412dc8009aa0', 'department': 'Sound', 'gender': 0, 'id': 8067, 'job': 'Assistant Sound Editor', 'name': 'Dan Engstrom', 'profile_path': None}, {'credit_id': '589216e49251412dcd009a4f', 'department': 'Production', 'gender': 1, 'id': 7902, 'job': 'Casting Consultant', 'name': 'Ruth Lambert', 'profile_path': None}, {'credit_id': '589216f39251412dc2009cf3', 'department': 'Production', 'gender': 0, 'id': 84493, 'job': 'ADR Voice Casting', 'name': 'Mickie McGowan', 'profile_path': '/k7TjJBfINsg8vLQxJwos6XObAD6.jpg'}] 862 +[{'cast_id': 1, 'character': 'Alan Parrish', 'credit_id': '52fe44bfc3a36847f80a7c73', 'gender': 2, 'id': 2157, 'name': 'Robin Williams', 'order': 0, 'profile_path': '/sojtJyIV3lkUeThD7A2oHNm8183.jpg'}, {'cast_id': 8, 'character': 'Samuel Alan Parrish / Van Pelt', 'credit_id': '52fe44bfc3a36847f80a7c99', 'gender': 2, 'id': 8537, 'name': 'Jonathan Hyde', 'order': 1, 'profile_path': '/7il5D76vx6QVRVlpVvBPEC40MBi.jpg'}, {'cast_id': 2, 'character': 'Judy Sheperd', 'credit_id': '52fe44bfc3a36847f80a7c77', 'gender': 1, 'id': 205, 'name': 'Kirsten Dunst', 'order': 2, 'profile_path': '/wBXvh6PJd0IUVNpvatPC1kzuHtm.jpg'}, {'cast_id': 24, 'character': 'Peter Shepherd', 'credit_id': '52fe44c0c3a36847f80a7ce7', 'gender': 0, 'id': 145151, 'name': 'Bradley Pierce', 'order': 3, 'profile_path': '/j6iW0vVA23GQniAPSYI6mi4hiEW.jpg'}, {'cast_id': 10, 'character': 'Sarah Whittle', 'credit_id': '52fe44bfc3a36847f80a7c9d', 'gender': 1, 'id': 5149, 'name': 'Bonnie Hunt', 'order': 4, 'profile_path': '/7spiVQwmr8siw5QCcvvdRG3c7Lf.jpg'}, {'cast_id': 25, 'character': 'Nora Shepherd', 'credit_id': '52fe44c0c3a36847f80a7ceb', 'gender': 1, 'id': 10739, 'name': 'Bebe Neuwirth', 'order': 5, 'profile_path': '/xm58rpMRVDHS0IGttw1pTlqGwkN.jpg'}, {'cast_id': 26, 'character': 'Carl Bentley', 'credit_id': '52fe44c0c3a36847f80a7cef', 'gender': 2, 'id': 58563, 'name': 'David Alan Grier', 'order': 6, 'profile_path': '/5tkt3qRZTco4sz604aTIarQ0m8W.jpg'}, {'cast_id': 11, 'character': 'Carol Anne Parrish', 'credit_id': '52fe44bfc3a36847f80a7ca1', 'gender': 1, 'id': 1276, 'name': 'Patricia Clarkson', 'order': 7, 'profile_path': '/10ZSyaUqzUlKTd60HmeiGhlytZG.jpg'}, {'cast_id': 14, 'character': 'Alan Parrish (young)', 'credit_id': '52fe44bfc3a36847f80a7cad', 'gender': 0, 'id': 46530, 'name': 'Adam Hann-Byrd', 'order': 8, 'profile_path': '/hEoqDqtMO91hYWD5iDrDesnLDlt.jpg'}, {'cast_id': 13, 'character': 'Sarah Whittle (young)', 'credit_id': '52fe44bfc3a36847f80a7ca9', 'gender': 1, 'id': 56523, 'name': 'Laura Bell Bundy', 'order': 9, 'profile_path': '/8tAVDBRoZPjKfCbBDyh4iK9JNEp.jpg'}, {'cast_id': 31, 'character': 'Exterminator', 'credit_id': '52fe44c0c3a36847f80a7cff', 'gender': 2, 'id': 51551, 'name': 'James Handy', 'order': 10, 'profile_path': '/vm0WQmuP8jEGgFTd3VCcJe7zpUi.jpg'}, {'cast_id': 12, 'character': 'Mrs. Thomas the Realtor', 'credit_id': '52fe44bfc3a36847f80a7ca5', 'gender': 1, 'id': 56522, 'name': 'Gillian Barber', 'order': 11, 'profile_path': '/qoqPX15J5jh6Sy0A9JvvRJIuw64.jpg'}, {'cast_id': 28, 'character': 'Benjamin', 'credit_id': '52fe44c0c3a36847f80a7cf3', 'gender': 2, 'id': 1000304, 'name': 'Brandon Obray', 'order': 12, 'profile_path': None}, {'cast_id': 29, 'character': 'Caleb', 'credit_id': '52fe44c0c3a36847f80a7cf7', 'gender': 0, 'id': 188949, 'name': 'Cyrus Thiedeke', 'order': 13, 'profile_path': None}, {'cast_id': 30, 'character': 'Billy Jessup', 'credit_id': '52fe44c0c3a36847f80a7cfb', 'gender': 0, 'id': 1076551, 'name': 'Gary Joseph Thorup', 'order': 14, 'profile_path': None}, {'cast_id': 32, 'character': 'Cop', 'credit_id': '5588053fc3a36838530063f5', 'gender': 0, 'id': 1480246, 'name': 'Leonard Zola', 'order': 15, 'profile_path': None}, {'cast_id': 33, 'character': 'Bum', 'credit_id': '55935687925141645a002097', 'gender': 2, 'id': 25024, 'name': 'Lloyd Berry', 'order': 16, 'profile_path': '/s7SVCOtvcuQ9wRQPZfUdahb5x88.jpg'}, {'cast_id': 34, 'character': 'Jim Shepherd', 'credit_id': '559356d09251415df8002cb7', 'gender': 2, 'id': 27110, 'name': 'Malcolm Stewart', 'order': 17, 'profile_path': '/l2vgzkLR7GRr8ugjZCILA0OiliI.jpg'}, {'cast_id': 35, 'character': 'Martha Shepherd', 'credit_id': '55935730925141645a0020ad', 'gender': 0, 'id': 53715, 'name': 'Annabel Kershaw', 'order': 18, 'profile_path': '/1VqbvAohBwFhETZtDe76JXQcxKm.jpg'}, {'cast_id': 36, 'character': 'Gun Salesman', 'credit_id': '5593576992514167fd000610', 'gender': 2, 'id': 1379424, 'name': 'Darryl Henriques', 'order': 19, 'profile_path': '/7QMHooY9ewNQlE24WKAOdwW0evU.jpg'}, {'cast_id': 37, 'character': 'Paramedic', 'credit_id': '559357ae92514152de002f42', 'gender': 0, 'id': 1235504, 'name': 'Robyn Driscoll', 'order': 20, 'profile_path': None}, {'cast_id': 50, 'character': 'Paramedic', 'credit_id': '5657803b925141018f00a5dc', 'gender': 2, 'id': 25389, 'name': 'Peter Bryant', 'order': 21, 'profile_path': '/fkcx9Tnp25UC5HlI2eW3nGvumsZ.jpg'}, {'cast_id': 39, 'character': 'Girl', 'credit_id': '559358e292514152de002f63', 'gender': 0, 'id': 1483449, 'name': 'Sarah Gilson', 'order': 22, 'profile_path': None}, {'cast_id': 40, 'character': 'Girl', 'credit_id': '5593590d92514152db002df3', 'gender': 0, 'id': 1483450, 'name': 'Florica Vlad', 'order': 23, 'profile_path': None}, {'cast_id': 41, 'character': 'Baker', 'credit_id': '55935946c3a36869d1001b4d', 'gender': 0, 'id': 1483451, 'name': 'June Lion', 'order': 24, 'profile_path': None}, {'cast_id': 42, 'character': 'Pianist', 'credit_id': '5593597692514167fd000644', 'gender': 0, 'id': 1483452, 'name': 'Brenda Lockmuller', 'order': 25, 'profile_path': None}] [{'credit_id': '52fe44bfc3a36847f80a7cd1', 'department': 'Production', 'gender': 2, 'id': 511, 'job': 'Executive Producer', 'name': 'Larry J. Franco', 'profile_path': None}, {'credit_id': '52fe44bfc3a36847f80a7c89', 'department': 'Writing', 'gender': 2, 'id': 876, 'job': 'Screenplay', 'name': 'Jonathan Hensleigh', 'profile_path': '/l1c4UFD3g0HVWj5f0CxXAvMAGiT.jpg'}, {'credit_id': '52fe44bfc3a36847f80a7cdd', 'department': 'Sound', 'gender': 2, 'id': 1729, 'job': 'Original Music Composer', 'name': 'James Horner', 'profile_path': '/oLOtXxXsYk8X4qq0ud4xVypXudi.jpg'}, {'credit_id': '52fe44bfc3a36847f80a7c7d', 'department': 'Directing', 'gender': 2, 'id': 4945, 'job': 'Director', 'name': 'Joe Johnston', 'profile_path': '/fok4jaO62v5IP6hkpaaAcXuw2H.jpg'}, {'credit_id': '52fe44bfc3a36847f80a7cd7', 'department': 'Editing', 'gender': 2, 'id': 4951, 'job': 'Editor', 'name': 'Robert Dalva', 'profile_path': None}, {'credit_id': '573523bec3a368025100062c', 'department': 'Production', 'gender': 0, 'id': 4952, 'job': 'Casting', 'name': 'Nancy Foy', 'profile_path': '/blCkmS4dqNsbPGuQfozHE6wgWBw.jpg'}, {'credit_id': '5722a924c3a3682d1e000b41', 'department': 'Visual Effects', 'gender': 0, 'id': 8023, 'job': 'Animation Supervisor', 'name': 'Kyle Balda', 'profile_path': '/jR8iAP6uC0V42KbUG87qBIUO3Hj.jpg'}, {'credit_id': '52fe44c0c3a36847f80a7ce3', 'department': 'Art', 'gender': 2, 'id': 9967, 'job': 'Production Design', 'name': 'James D. Bissell', 'profile_path': None}, {'credit_id': '52fe44bfc3a36847f80a7cb9', 'department': 'Production', 'gender': 2, 'id': 9184, 'job': 'Producer', 'name': 'Scott Kroopf', 'profile_path': None}, {'credit_id': '52fe44bfc3a36847f80a7ccb', 'department': 'Production', 'gender': 2, 'id': 9196, 'job': 'Executive Producer', 'name': 'Ted Field', 'profile_path': '/qmB7sZcgRUq7mRFBSTlSsVXh7sH.jpg'}, {'credit_id': '52fe44bfc3a36847f80a7cc5', 'department': 'Production', 'gender': 2, 'id': 18389, 'job': 'Executive Producer', 'name': 'Robert W. Cort', 'profile_path': None}, {'credit_id': '52fe44bfc3a36847f80a7cbf', 'department': 'Camera', 'gender': 2, 'id': 11371, 'job': 'Director of Photography', 'name': 'Thomas E. Ackerman', 'profile_path': '/xFDbxk53icM1ofL4iCIwB4GkUxN.jpg'}, {'credit_id': '52fe44bfc3a36847f80a7c83', 'department': 'Writing', 'gender': 2, 'id': 42356, 'job': 'Novel', 'name': 'Chris van Allsburg', 'profile_path': None}, {'credit_id': '52fe44bfc3a36847f80a7cb3', 'department': 'Production', 'gender': 2, 'id': 42357, 'job': 'Producer', 'name': 'William Teitler', 'profile_path': None}, {'credit_id': '52fe44bfc3a36847f80a7c8f', 'department': 'Writing', 'gender': 2, 'id': 56520, 'job': 'Screenplay', 'name': 'Greg Taylor', 'profile_path': None}, {'credit_id': '52fe44bfc3a36847f80a7c95', 'department': 'Writing', 'gender': 2, 'id': 56521, 'job': 'Screenplay', 'name': 'Jim Strain', 'profile_path': None}] 8844 + [{'cast_id': 2, 'character': 'Max Goldman', 'credit_id': '52fe466a9251416c75077a8d', 'gender': 2, 'id': 6837, 'name': 'Walter Matthau', 'order': 0, 'profile_path': '/xJVkvprOnzP5Zdh5y63y8HHniDZ.jpg'}, {'cast_id': 3, 'character': 'John Gustafson', 'credit_id': '52fe466a9251416c75077a91', 'gender': 2, 'id': 3151, 'name': 'Jack Lemmon', 'order': 1, 'profile_path': '/chZmNRYMtqkiDlatprGDH4BzGqG.jpg'}, {'cast_id': 4, 'character': 'Ariel Gustafson', 'credit_id': '52fe466a9251416c75077a95', 'gender': 1, 'id': 13567, 'name': 'Ann-Margret', 'order': 2, 'profile_path': '/jx5lTaJ5VXZHYB52gaOTAZ9STZk.jpg'}, {'cast_id': 5, 'character': 'Maria Sophia Coletta Ragetti', 'credit_id': '52fe466a9251416c75077a99', 'gender': 1, 'id': 16757, 'name': 'Sophia Loren', 'order': 3, 'profile_path': '/emKLhbji1c7BjcA2DdbWf0EP9zH.jpg'}, {'cast_id': 6, 'character': 'Melanie Gustafson', 'credit_id': '52fe466a9251416c75077a9d', 'gender': 1, 'id': 589, 'name': 'Daryl Hannah', 'order': 4, 'profile_path': '/4LLmp6AQdlj6ueGCRbVRSGvvFSt.jpg'}, {'cast_id': 9, 'character': 'Grandpa Gustafson', 'credit_id': '53e5fcc2c3a3684430000d65', 'gender': 2, 'id': 16523, 'name': 'Burgess Meredith', 'order': 5, 'profile_path': '/lm98oKloU33Q7QDIIMSyc4Pr2jA.jpg'}, {'cast_id': 10, 'character': 'Jacob Goldman', 'credit_id': '53e5fcd4c3a3684433000e1a', 'gender': 2, 'id': 7166, 'name': 'Kevin Pollak', 'order': 6, 'profile_path': '/kwu2T8CDnThZTzE88uiSgJ5eHXf.jpg'}] [{'credit_id': '52fe466a9251416c75077a89', 'department': 'Directing', 'gender': 2, 'id': 26502, 'job': 'Director', 'name': 'Howard Deutch', 'profile_path': '/68Vae1HkU1NxQZ6KEmuxIpno7c9.jpg'}, {'credit_id': '52fe466b9251416c75077aa3', 'department': 'Writing', 'gender': 2, 'id': 16837, 'job': 'Characters', 'name': 'Mark Steven Johnson', 'profile_path': '/6trChNn3o2bi4i2ipgMEAytwmZp.jpg'}, {'credit_id': '52fe466b9251416c75077aa9', 'department': 'Writing', 'gender': 2, 'id': 16837, 'job': 'Writer', 'name': 'Mark Steven Johnson', 'profile_path': '/6trChNn3o2bi4i2ipgMEAytwmZp.jpg'}, {'credit_id': '5675eb4b92514179dd003933', 'department': 'Crew', 'gender': 2, 'id': 1551320, 'job': 'Sound Recordist', 'name': 'Jack Keller', 'profile_path': None}] 15602 + [{'cast_id': 1, 'character': "Savannah 'Vannah' Jackson", 'credit_id': '52fe44779251416c91011aad', 'gender': 1, 'id': 8851, 'name': 'Whitney Houston', 'order': 0, 'profile_path': '/69ouDnXnmklYPr4sMJXWKYz81AL.jpg'}, {'cast_id': 2, 'character': "Bernadine 'Bernie' Harris", 'credit_id': '52fe44779251416c91011ab1', 'gender': 1, 'id': 9780, 'name': 'Angela Bassett', 'order': 1, 'profile_path': '/tHkgSzhEuJKp5hqp0DZLad8HNZ9.jpg'}, {'cast_id': 3, 'character': "Gloria 'Glo' Matthews", 'credit_id': '52fe44779251416c91011ab5', 'gender': 1, 'id': 18284, 'name': 'Loretta Devine', 'order': 2, 'profile_path': '/zLQFwQTFtHkb8sbFdkPNamFI7jv.jpg'}, {'cast_id': 4, 'character': 'Robin Stokes', 'credit_id': '52fe44779251416c91011ab9', 'gender': 1, 'id': 51359, 'name': 'Lela Rochon', 'order': 3, 'profile_path': '/9DBu3r5O4fBosSS4FnSzFCVpm0O.jpg'}, {'cast_id': 5, 'character': 'Marvin King', 'credit_id': '52fe44779251416c91011abd', 'gender': 2, 'id': 66804, 'name': 'Gregory Hines', 'order': 4, 'profile_path': '/rvvQWFQGeGR14WFVNe0Qg1J7uVY.jpg'}, {'cast_id': 6, 'character': 'Kenneth Dawkins', 'credit_id': '52fe44779251416c91011ac1', 'gender': 2, 'id': 352, 'name': 'Dennis Haysbert', 'order': 5, 'profile_path': '/mn5Nc5Q31GslpVVWs8p41W4TBma.jpg'}, {'cast_id': 8, 'character': 'John Harris, Sr.', 'credit_id': '52fe44779251416c91011ac5', 'gender': 2, 'id': 87118, 'name': 'Michael Beach', 'order': 6, 'profile_path': '/lOlWhURNKyZHn71dvq7qC7fiwN6.jpg'}, {'cast_id': 10, 'character': 'Troy', 'credit_id': '52fe44779251416c91011acf', 'gender': 2, 'id': 34, 'name': 'Mykelti Williamson', 'order': 7, 'profile_path': '/8TTxzpuvvpw2tB8xZBCDslYupNU.jpg'}, {'cast_id': 20, 'character': 'Joseph', 'credit_id': '56d1b15fc3a3681e4a008b6b', 'gender': 2, 'id': 1276777, 'name': 'Lamont Johnson', 'order': 8, 'profile_path': '/wfZzRPJBdsx62GbkUimc1PShxyC.jpg'}, {'cast_id': 21, 'character': 'James Wheeler', 'credit_id': '56f8a929c3a36816e80084f3', 'gender': 2, 'id': 10814, 'name': 'Wesley Snipes', 'order': 9, 'profile_path': '/hQ6EBa6vgu7HoZpzms8Y10VL5Iw.jpg'}] [{'credit_id': '52fe44779251416c91011acb', 'department': 'Directing', 'gender': 2, 'id': 2178, 'job': 'Director', 'name': 'Forest Whitaker', 'profile_path': '/4pMQkelS5lK661m9Kz3oIxLYiyS.jpg'}, {'credit_id': '52fe44779251416c91011ae1', 'department': 'Writing', 'gender': 0, 'id': 5144, 'job': 'Screenplay', 'name': 'Ronald Bass', 'profile_path': None}, {'credit_id': '52fe44779251416c91011ae7', 'department': 'Production', 'gender': 0, 'id': 5144, 'job': 'Producer', 'name': 'Ronald Bass', 'profile_path': None}, {'credit_id': '52fe44779251416c91011aff', 'department': 'Production', 'gender': 2, 'id': 21968, 'job': 'Producer', 'name': 'Ezra Swerdlow', 'profile_path': None}, {'credit_id': '52fe44779251416c91011af9', 'department': 'Production', 'gender': 1, 'id': 70592, 'job': 'Producer', 'name': 'Deborah Schindler', 'profile_path': '/2vFzdHxcB8cEtvPlNSs2VGZ7WG3.jpg'}, {'credit_id': '52fe44779251416c91011adb', 'department': 'Writing', 'gender': 0, 'id': 111118, 'job': 'Screenplay', 'name': 'Terry McMillan', 'profile_path': None}, {'credit_id': '52fe44779251416c91011af3', 'department': 'Production', 'gender': 0, 'id': 111118, 'job': 'Executive Producer', 'name': 'Terry McMillan', 'profile_path': None}, {'credit_id': '52fe44779251416c91011ad5', 'department': 'Writing', 'gender': 0, 'id': 111118, 'job': 'Novel', 'name': 'Terry McMillan', 'profile_path': None}, {'credit_id': '52fe44779251416c91011b05', 'department': 'Sound', 'gender': 2, 'id': 1079697, 'job': 'Original Music Composer', 'name': 'Kenneth Edmonds', 'profile_path': '/2EqVBaDGRA0sHkEKagu2NxfP1Nm.jpg'}, {'credit_id': '52fe44779251416c91011aed', 'department': 'Production', 'gender': 0, 'id': 1087695, 'job': 'Producer', 'name': 'Caron K', 'profile_path': None}] 31357 + [{'cast_id': 1, 'character': 'George Banks', 'credit_id': '52fe44959251416c75039eb9', 'gender': 2, 'id': 67773, 'name': 'Steve Martin', 'order': 0, 'profile_path': '/rI2EMvkfKKPKa5z0nM2pFVBtUyO.jpg'}, {'cast_id': 2, 'character': 'Nina Banks', 'credit_id': '52fe44959251416c75039ebd', 'gender': 1, 'id': 3092, 'name': 'Diane Keaton', 'order': 1, 'profile_path': '/fzgUMnbOkxC6E3EFcYHWHFaiKyp.jpg'}, {'cast_id': 3, 'character': 'Franck Eggelhoffer', 'credit_id': '52fe44959251416c75039ec1', 'gender': 2, 'id': 519, 'name': 'Martin Short', 'order': 2, 'profile_path': '/oZQorXBjTxrdkTJFpoDwOcQ91ji.jpg'}, {'cast_id': 4, 'character': 'Annie Banks-MacKenzie', 'credit_id': '52fe44959251416c75039ec5', 'gender': 1, 'id': 70696, 'name': 'Kimberly Williams-Paisley', 'order': 3, 'profile_path': '/nVp4F4VFqVvjh6huOULUQoiAguY.jpg'}, {'cast_id': 13, 'character': 'Bryan MacKenzie', 'credit_id': '52fe44959251416c75039ef3', 'gender': 2, 'id': 59222, 'name': 'George Newbern', 'order': 4, 'profile_path': '/48Ouqe1g8QrZ6qjvap5NvhfKuly.jpg'}, {'cast_id': 14, 'character': 'Matty Banks', 'credit_id': '52fe44959251416c75039ef7', 'gender': 0, 'id': 18793, 'name': 'Kieran Culkin', 'order': 5, 'profile_path': '/swcqQCMREeGCk6FAxIfczGpFBys.jpg'}, {'cast_id': 15, 'character': 'Howard Weinstein', 'credit_id': '52fe44959251416c75039efb', 'gender': 2, 'id': 14592, 'name': 'BD Wong', 'order': 6, 'profile_path': '/o8JUV37KWNorHOabp3zyD756oE3.jpg'}, {'cast_id': 16, 'character': 'John MacKenzie', 'credit_id': '52fe44959251416c75039eff', 'gender': 2, 'id': 20906, 'name': 'Peter Michael Goetz', 'order': 7, 'profile_path': '/a2hLcCidETgwlVyQnYy4kXVKUcn.jpg'}, {'cast_id': 17, 'character': 'Joanna MacKenzie', 'credit_id': '52fe44959251416c75039f03', 'gender': 1, 'id': 54348, 'name': 'Kate McGregor-Stewart', 'order': 8, 'profile_path': '/vsvdAmMZgX85AvnVFd4jigOUipZ.jpg'}, {'cast_id': 18, 'character': 'Dr. Megan Eisenberg', 'credit_id': '52fe44959251416c75039f07', 'gender': 1, 'id': 209, 'name': 'Jane Adams', 'order': 9, 'profile_path': '/HbQfL01xmV1psnh0WvldIBzDg3.jpg'}, {'cast_id': 19, 'character': 'Mr. Habib', 'credit_id': '52fe44959251416c75039f0b', 'gender': 2, 'id': 26510, 'name': 'Eugene Levy', 'order': 10, 'profile_path': '/69IBiDjU1gSqtrcGOA7PA7aEYsc.jpg'}, {'cast_id': 20, 'character': 'Wife Mrs. Habib', 'credit_id': '57ffa654c3a3681552000277', 'gender': 1, 'id': 24358, 'name': 'Lori Alan', 'order': 11, 'profile_path': '/mNfJWzuaKgkIaK7CuirXOMosd2h.jpg'}] [{'credit_id': '52fe44959251416c75039ed7', 'department': 'Sound', 'gender': 2, 'id': 37, 'job': 'Original Music Composer', 'name': 'Alan Silvestri', 'profile_path': '/chEsfnDEtRmv1bfOaNAoVEzhCc6.jpg'}, {'credit_id': '52fe44959251416c75039ee9', 'department': 'Camera', 'gender': 2, 'id': 5506, 'job': 'Director of Photography', 'name': 'Elliot Davis', 'profile_path': None}, {'credit_id': '52fe44959251416c75039ecb', 'department': 'Writing', 'gender': 1, 'id': 17698, 'job': 'Screenplay', 'name': 'Nancy Meyers', 'profile_path': '/nMPHU06dnvVxEjjcnPCPUQgQ2Mp.jpg'}, {'credit_id': '52fe44959251416c75039edd', 'department': 'Production', 'gender': 1, 'id': 17698, 'job': 'Producer', 'name': 'Nancy Meyers', 'profile_path': '/nMPHU06dnvVxEjjcnPCPUQgQ2Mp.jpg'}, {'credit_id': '52fe44959251416c75039ed1', 'department': 'Writing', 'gender': 2, 'id': 26160, 'job': 'Screenplay', 'name': 'Albert Hackett', 'profile_path': None}, {'credit_id': '52fe44959251416c75039eef', 'department': 'Directing', 'gender': 2, 'id': 56106, 'job': 'Director', 'name': 'Charles Shyer', 'profile_path': '/hnWGd74CbmTcDCFQiJ8SYLazIXW.jpg'}, {'credit_id': '52fe44959251416c75039ee3', 'department': 'Editing', 'gender': 2, 'id': 68755, 'job': 'Editor', 'name': 'Adam Bernardi', 'profile_path': None}] 11862 + +null report (sorted): + null_count null_pct +cast 0 0.0 +crew 0 0.0 +id 0 0.0 + +exact duplicate rows: 37 +duplicate index numbers (showing up to 200): [1465, 9165, 9327, 12066, 13375, 15074, 15765, 16764, 20842, 20898, 21115, 21164, 21853, 22150, 23043, 24843, 25949, 25950, 25953, 25954, 25955, 25956, 25957, 25965, 25966, 25967, 25968, 25969, 25974, 25975, 28873, 29387, 33755, 40287, 44831, 44836, 45275] + cast crew id +1465 [{'cast_id': 5, 'character': 'Sophie II', 'credit_id': '52fe4a44c3a36847f81c4655', 'gender': 1, 'id': 680, 'name': 'Corinna Harfouch', 'order': 0, 'profile_path': '/oEXqVyUnBLP1JxRVhpT6B3pIe37.jpg'}, {'cast_id': 6, 'character': 'Sophie I', 'credit_id': '52fe4a44c3a36847f81c4659', 'gender': 1, 'id': 8790, 'name': 'Meret Becker', 'order': 1, 'profile_path': '/104ahKJHYJtZAPZHHJFAoJQkuuv.jpg'}, {'cast_id': 7, 'character': 'Konrad', 'credit_id': '536a992bc3a368124a0085a9', 'gender': 2, 'id': 11951, 'name': 'August Zirner', 'order': 2, 'profile_path': '/51Bgab3YLjCFIbJoFiXrSK95BXI.jpg'}, {'cast_id': 8, 'character': 'Barbara', 'credit_id': '536a993bc3a3681231008608', 'gender': 1, 'id': 10627, 'name': 'Eva Mattes', 'order': 3, 'profile_path': '/9qUzPuHrQ2DV64CycNJMUFq4YvB.jpg'}, {'cast_id': 9, 'character': 'Müller', 'credit_id': '536a9948c3a3681226008272', 'gender': 2, 'id': 1865, 'name': 'Hark Bohm', 'order': 4, 'profile_path': '/ptmIE7lSIiCS6mlXHOz3WgT7D43.jpg'}, {'cast_id': 10, 'character': 'Professor Lorenz', 'credit_id': '536a9956c3a368121200830e', 'gender': 2, 'id': 2311, 'name': 'Otto Sander', 'order': 5, 'profile_path': '/AurmCt7XRXRMFJ4xVMwTVkL5hg5.jpg'}, {'cast_id': 11, 'character': 'Elisabeth', 'credit_id': '536a996ec3a3681226008278', 'gender': 1, 'id': 39908, 'name': 'Ulrike Krumbiegel', 'order': 6, 'profile_path': '/jf9gKA0GfdZ56PwxcthQiZC6BZ0.jpg'}, {'cast_id': 12, 'character': 'Harald I', 'credit_id': '5647c8479251413ae1004b64', 'gender': 0, 'id': 48370, 'name': 'Pierre Besson', 'order': 7, 'profile_path': '/4mJIPaY0gXANBHkwWIX3d8a1jWj.jpg'}, {'cast_id': 13, 'character': 'Sophies Tante', 'credit_id': '5647c8619251413ae1004b67', 'gender': 0, 'id': 10258, 'name': 'Tina Engel', 'order': 8, 'profile_path': '/6R3jDyKN3LETdfnCPh5n7tYskHT.jpg'}] [{'credit_id': '52fe4a44c3a36847f81c463f', 'department': 'Directing', 'gender': 1, 'id': 39298, 'job': 'Director', 'name': 'Margarethe von Trotta', 'profile_path': '/inHCKINOzP9gby9zCIMm902i1ln.jpg'}, {'credit_id': '52fe4a44c3a36847f81c4645', 'department': 'Writing', 'gender': 2, 'id': 169302, 'job': 'Writer', 'name': 'Peter Schneider', 'profile_path': None}, {'credit_id': '52fe4a44c3a36847f81c464b', 'department': 'Writing', 'gender': 0, 'id': 1037349, 'job': 'Writer', 'name': 'Felice Laudadio', 'profile_path': None}, {'credit_id': '52fe4a44c3a36847f81c4651', 'department': 'Writing', 'gender': 1, 'id': 39298, 'job': 'Writer', 'name': 'Margarethe von Trotta', 'profile_path': '/inHCKINOzP9gby9zCIMm902i1ln.jpg'}] 105045 +9165 [{'cast_id': 11, 'character': 'Jef Costello', 'credit_id': '52fe440ac3a36847f807ee17', 'gender': 2, 'id': 15135, 'name': 'Alain Delon', 'order': 0, 'profile_path': '/jNXHnLspNoh1ZOH7gwCkMrjLA7R.jpg'}, {'cast_id': 12, 'character': 'Superintendant', 'credit_id': '52fe440ac3a36847f807ee1b', 'gender': 2, 'id': 27440, 'name': 'François Périer', 'order': 1, 'profile_path': '/nYEA81FGfwT0O9obLX6gysC7VeO.jpg'}, {'cast_id': 13, 'character': 'Jane Lagrange', 'credit_id': '52fe440ac3a36847f807ee1f', 'gender': 1, 'id': 43814, 'name': 'Nathalie Delon', 'order': 2, 'profile_path': '/8rQzGV7ITkHYdG10RgGKXZsvf3n.jpg'}, {'cast_id': 14, 'character': 'Valerie, die Pianistin', 'credit_id': '52fe440ac3a36847f807ee23', 'gender': 1, 'id': 43815, 'name': 'Cathy Rosier', 'order': 3, 'profile_path': '/6kcDIq4ALG9iS6aqYbotYJ11Coq.jpg'}, {'cast_id': 15, 'character': 'La jeune fille du vestiaire', 'credit_id': '52fe440ac3a36847f807ee27', 'gender': 1, 'id': 20945, 'name': 'Catherine Jourdan', 'order': 4, 'profile_path': '/hLPxSkGrKjs6uN32AR8KaLNPJIa.jpg'}, {'cast_id': 17, 'character': "L'homme de la passerelle", 'credit_id': '57acb5dbc3a3682f18000394', 'gender': 0, 'id': 1664777, 'name': 'Jacques Leroy', 'order': 5, 'profile_path': None}, {'cast_id': 18, 'character': 'Wiener', 'credit_id': '57acb5f0925141473600157e', 'gender': 2, 'id': 35216, 'name': 'Michel Boisrond', 'order': 6, 'profile_path': '/djRMKctZJkSFVITNESlUe3KN6ma.jpg'}, {'cast_id': 19, 'character': 'Le barman', 'credit_id': '57acb6009251417f3e00042d', 'gender': 2, 'id': 277526, 'name': 'Robert Favart', 'order': 7, 'profile_path': '/suDSbW2IiphFfZe53vUa5bjRrdP.jpg'}, {'cast_id': 20, 'character': 'Olivier Rey', 'credit_id': '57acb60dc3a3682f180003af', 'gender': 2, 'id': 54403, 'name': 'Jean-Pierre Posier', 'order': 8, 'profile_path': None}, {'cast_id': 21, 'character': '1er inspecteur', 'credit_id': '57acb62fc3a3681e02002900', 'gender': 2, 'id': 1117806, 'name': 'Roger Fradet', 'order': 9, 'profile_path': None}, {'cast_id': 22, 'character': '2ème inspecteur', 'credit_id': '57acb644925141478000178f', 'gender': 2, 'id': 32372, 'name': 'Carlo Nell', 'order': 10, 'profile_path': '/rifzXd7xghJog2CwevsLmIuwF4N.jpg'}, {'cast_id': 23, 'character': '3ème inspecteur', 'credit_id': '57acb6519251414780001798', 'gender': 2, 'id': 1664779, 'name': 'Robert Rondo', 'order': 11, 'profile_path': None}, {'cast_id': 24, 'character': 'Le garagiste', 'credit_id': '57acb65dc3a3682d1c000467', 'gender': 2, 'id': 1664780, 'name': 'André Salgues', 'order': 12, 'profile_path': None}, {'cast_id': 25, 'character': 'Policier - chauffeur de taxi', 'credit_id': '57acb669c3a3682dc6000410', 'gender': 2, 'id': 44408, 'name': 'André Thorent', 'order': 13, 'profile_path': '/aF9ZRcjwYZju3QXUZIZf6EUogGT.jpg'}, {'cast_id': 26, 'character': 'Policier speaker', 'credit_id': '57acb6b0c3a3681daa002d64', 'gender': 2, 'id': 68371, 'name': 'Jacques Deschamps', 'order': 14, 'profile_path': '/keZBlDTA5kczbNVG47C3VNXViEe.jpg'}, {'cast_id': 27, 'character': 'Damolini', 'credit_id': '57acb79b9251417f3b0004ca', 'gender': 2, 'id': 326033, 'name': 'Georges Casati', 'order': 15, 'profile_path': None}, {'cast_id': 28, 'character': 'Garcia', 'credit_id': '57acb7afc3a3681e020029a0', 'gender': 2, 'id': 112760, 'name': 'Jacques Léonard', 'order': 16, 'profile_path': None}, {'cast_id': 29, 'character': '1er Policier de la visite nocturne', 'credit_id': '57acb7be9251411263002919', 'gender': 2, 'id': 1664786, 'name': 'Pierre Vaudier', 'order': 17, 'profile_path': None}, {'cast_id': 30, 'character': '2ème Policier de la visite nocturne', 'credit_id': '57acb7cdc3a368646c0016ff', 'gender': 2, 'id': 1664787, 'name': 'Maurice Magalon', 'order': 18, 'profile_path': None}, {'cast_id': 31, 'character': "Maître d'hôtel", 'credit_id': '57acb7de9251417f3e0004dd', 'gender': 2, 'id': 1100818, 'name': 'Gaston Meunier', 'order': 19, 'profile_path': None}, {'cast_id': 32, 'character': '1er client Night-Club', 'credit_id': '57acb7f2c3a368646c001710', 'gender': 2, 'id': 1326575, 'name': 'Jean Gold', 'order': 20, 'profile_path': '/4dSlBQuwBL9Sz3gyB6GTNIuBiAa.jpg'}, {'cast_id': 33, 'character': '2ème client Night-Club', 'credit_id': '57acb820c3a3682f1800047e', 'gender': 2, 'id': 1664788, 'name': 'Georges Billy', 'order': 21, 'profile_path': None}, {'cast_id': 34, 'character': 'Un joueur de poker', 'credit_id': '57acb83e9251414736001659', 'gender': 2, 'id': 1664789, 'name': 'Ari Aricardi', 'order': 22, 'profile_path': None}, {'cast_id': 35, 'character': 'Un joueur de poker', 'credit_id': '57acb84c9251417f3e000502', 'gender': 2, 'id': 54395, 'name': 'Guy Bonnafoux', 'order': 23, 'profile_path': '/2nONpprBKcyyGfHuW1xH8vL1VRe.jpg'}, {'cast_id': 36, 'character': 'Un inspecteur', 'credit_id': '57acb85ac3a3681f2e000959', 'gender': 2, 'id': 1174961, 'name': 'Humberto Catalano', 'order': 24, 'profile_path': '/zxoA4WFGmBJ70Qu4rl3UgfymOcA.jpg'}, {'cast_id': 37, 'character': 'Sosie Jef', 'credit_id': '57acb8699251417f0d0004f1', 'gender': 2, 'id': 1664790, 'name': 'Carl Lechner', 'order': 25, 'profile_path': None}, {'cast_id': 38, 'character': 'La jeune fille au chewing-gum', 'credit_id': '57acb874c3a3682dc60004e6', 'gender': 1, 'id': 1664791, 'name': 'Maria Maneva', 'order': 26, 'profile_path': None}] [{'credit_id': '52fe440ac3a36847f807ee01', 'department': 'Camera', 'gender': 2, 'id': 1657, 'job': 'Director of Photography', 'name': 'Henri Decaë', 'profile_path': None}, {'credit_id': '52fe440ac3a36847f807edef', 'department': 'Production', 'gender': 2, 'id': 2577, 'job': 'Producer', 'name': 'Raymond Borderie', 'profile_path': '/3OAqxzgb040FDscICkbK2hYpAzB.jpg'}, {'credit_id': '52fe440ac3a36847f807ede9', 'department': 'Writing', 'gender': 2, 'id': 3831, 'job': 'Screenplay', 'name': 'Jean-Pierre Melville', 'profile_path': '/f0wFxF6dLUdO6a9AIhcV51prFEK.jpg'}, {'credit_id': '52fe440ac3a36847f807eddd', 'department': 'Directing', 'gender': 2, 'id': 3831, 'job': 'Director', 'name': 'Jean-Pierre Melville', 'profile_path': '/f0wFxF6dLUdO6a9AIhcV51prFEK.jpg'}, {'credit_id': '52fe440ac3a36847f807ee07', 'department': 'Production', 'gender': 2, 'id': 3831, 'job': 'Producer', 'name': 'Jean-Pierre Melville', 'profile_path': '/f0wFxF6dLUdO6a9AIhcV51prFEK.jpg'}, {'credit_id': '52fe440ac3a36847f807edfb', 'department': 'Sound', 'gender': 2, 'id': 24775, 'job': 'Music', 'name': 'François de Roubaix', 'profile_path': '/kylSiLpGuVcZxxOKLhJmV6YRIBD.jpg'}, {'credit_id': '52fe440ac3a36847f807ede3', 'department': 'Writing', 'gender': 0, 'id': 43810, 'job': 'Screenplay', 'name': 'Georges Pellegrin', 'profile_path': None}, {'credit_id': '52fe440ac3a36847f807ee2d', 'department': 'Writing', 'gender': 0, 'id': 43810, 'job': 'Other', 'name': 'Georges Pellegrin', 'profile_path': None}, {'credit_id': '52fe440ac3a36847f807edf5', 'department': 'Production', 'gender': 0, 'id': 43811, 'job': 'Producer', 'name': 'Eugène Lépicier', 'profile_path': None}, {'credit_id': '52fe440ac3a36847f807ee0d', 'department': 'Editing', 'gender': 0, 'id': 43812, 'job': 'Editor', 'name': 'Monique Bonnot', 'profile_path': None}, {'credit_id': '52fe440ac3a36847f807ee13', 'department': 'Editing', 'gender': 0, 'id': 43813, 'job': 'Editor', 'name': 'Yolande Maurette', 'profile_path': None}, {'credit_id': '57accb31c3a3682d1c000e34', 'department': 'Writing', 'gender': 0, 'id': 1664823, 'job': 'Novel', 'name': 'Joan McLeod', 'profile_path': None}] 5511 +9327 [{'cast_id': 1, 'character': 'Lafcadia - Warrior', 'credit_id': '52fe4465c3a368484e020913', 'gender': 2, 'id': 76793, 'name': 'Irrfan Khan', 'order': 0, 'profile_path': '/9O71WSILj1af9smwuN44nGd198Q.jpg'}, {'cast_id': 2, 'character': "Katiba, Warrior's Son", 'credit_id': '52fe4465c3a368484e020917', 'gender': 0, 'id': 141755, 'name': 'Puru Chibber', 'order': 1, 'profile_path': None}, {'cast_id': 4, 'character': 'Biswas', 'credit_id': '52fe4465c3a368484e020921', 'gender': 0, 'id': 1140067, 'name': 'Aino Annuddin', 'order': 2, 'profile_path': None}, {'cast_id': 5, 'character': 'Warrior', 'credit_id': '52fe4465c3a368484e020925', 'gender': 0, 'id': 1140069, 'name': 'Manoj Mishra', 'order': 3, 'profile_path': None}, {'cast_id': 6, 'character': 'Warrior', 'credit_id': '52fe4465c3a368484e020929', 'gender': 0, 'id': 1140071, 'name': 'Nanhe Khan', 'order': 4, 'profile_path': None}, {'cast_id': 7, 'character': 'Warrior', 'credit_id': '52fe4465c3a368484e02092d', 'gender': 0, 'id': 1140074, 'name': 'Chander Singh', 'order': 5, 'profile_path': None}, {'cast_id': 8, 'character': 'Warrior', 'credit_id': '52fe4465c3a368484e020931', 'gender': 0, 'id': 1140075, 'name': 'Hemant Maahaor', 'order': 6, 'profile_path': None}, {'cast_id': 9, 'character': 'Rabia', 'credit_id': '52fe4465c3a368484e020935', 'gender': 1, 'id': 987703, 'name': 'Mandakini Goswami', 'order': 7, 'profile_path': None}, {'cast_id': 10, 'character': 'The girl', 'credit_id': '52fe4465c3a368484e020939', 'gender': 1, 'id': 987704, 'name': 'Sunita Sharma', 'order': 8, 'profile_path': None}, {'cast_id': 11, 'character': 'Clerk', 'credit_id': '52fe4465c3a368484e02093d', 'gender': 0, 'id': 1140076, 'name': 'Shaukat Baig', 'order': 9, 'profile_path': None}, {'cast_id': 12, 'character': 'Tarang village headman', 'credit_id': '52fe4465c3a368484e020941', 'gender': 0, 'id': 1140077, 'name': 'Gori Shanker', 'order': 10, 'profile_path': None}, {'cast_id': 13, 'character': 'Blacksmith', 'credit_id': '52fe4465c3a368484e020945', 'gender': 0, 'id': 1140078, 'name': 'Prabhuram', 'order': 11, 'profile_path': None}, {'cast_id': 14, 'character': "Blacksmith's son", 'credit_id': '52fe4465c3a368484e020949', 'gender': 0, 'id': 1140079, 'name': 'Wagaram', 'order': 12, 'profile_path': None}, {'cast_id': 15, 'character': 'Quarrey foreman', 'credit_id': '52fe4465c3a368484e02094d', 'gender': 0, 'id': 1140080, 'name': 'Ajai Rohilla', 'order': 13, 'profile_path': None}, {'cast_id': 16, 'character': 'Riaz - Thief', 'credit_id': '52fe4465c3a368484e020951', 'gender': 2, 'id': 987705, 'name': 'Noor Mani', 'order': 14, 'profile_path': None}, {'cast_id': 19, 'character': 'Dhaba stall owner', 'credit_id': '56bf988fc3a368180a00a6b6', 'gender': 0, 'id': 1072011, 'name': 'Sitaram Panchal', 'order': 15, 'profile_path': '/tWilglaEjxh815qn4YSJ3GWTomE.jpg'}, {'cast_id': 20, 'character': 'Dhaba stall man', 'credit_id': '56bf98a392514170aa0036a1', 'gender': 0, 'id': 1576979, 'name': 'Chander Prakash Vyas', 'order': 16, 'profile_path': None}, {'cast_id': 21, 'character': 'Dhaba stall man', 'credit_id': '56bf98b5c3a36817fd00945b', 'gender': 0, 'id': 1576980, 'name': 'Sanjal', 'order': 17, 'profile_path': None}, {'cast_id': 22, 'character': 'Lord', 'credit_id': '56bf98ca9251410c51000197', 'gender': 0, 'id': 223168, 'name': 'Anupam Shyam', 'order': 18, 'profile_path': '/cFhjos7fhG5iqYJqmd8RcI1MfGu.jpg'}, {'cast_id': 23, 'character': 'Market trader', 'credit_id': '56bf98dec3a36817f400a012', 'gender': 0, 'id': 1113617, 'name': 'Amit Kumar', 'order': 19, 'profile_path': None}, {'cast_id': 24, 'character': 'Blind woman', 'credit_id': '56bf995bc3a368180a00a6c5', 'gender': 0, 'id': 1576981, 'name': 'Damayanti Marfatia', 'order': 20, 'profile_path': None}, {'cast_id': 25, 'character': 'Cart driver', 'credit_id': '56bf996dc3a3681806009471', 'gender': 0, 'id': 1576982, 'name': 'Trilok Singh', 'order': 21, 'profile_path': None}, {'cast_id': 26, 'character': 'Restaurant owner', 'credit_id': '56bf9980c3a36817e4001dc5', 'gender': 0, 'id': 1576983, 'name': 'Pushpa Negi', 'order': 22, 'profile_path': None}, {'cast_id': 27, 'character': 'Restaurant girl', 'credit_id': '56bf99939251410bb00018a8', 'gender': 0, 'id': 1576984, 'name': 'Karuna Sarah Davis', 'order': 23, 'profile_path': None}, {'cast_id': 28, 'character': 'Rude customer', 'credit_id': '56bf99a3925141699c0007bc', 'gender': 0, 'id': 1576985, 'name': 'Rakesh Mehra', 'order': 24, 'profile_path': None}, {'cast_id': 29, 'character': "Lord's wife", 'credit_id': '56bf99b6c3a36817fd00946d', 'gender': 0, 'id': 1576986, 'name': 'Anuradha Advanti', 'order': 25, 'profile_path': None}, {'cast_id': 30, 'character': 'The Warrior (voice)', 'credit_id': '56bf99c992514170aa0036be', 'gender': 2, 'id': 128154, 'name': 'Ismail Bashey', 'order': 26, 'profile_path': '/cjtEReor3Ks1bxeNKqzm7jTN4Lu.jpg'}, {'cast_id': 31, 'character': 'Singer', 'credit_id': '56bf99e09251410c510001b0', 'gender': 2, 'id': 584639, 'name': 'Madhu', 'order': 27, 'profile_path': '/hzvZzChoSQzlE79CeXM1VIWj1qe.jpg'}] [{'credit_id': '52fe4465c3a368484e02091d', 'department': 'Directing', 'gender': 0, 'id': 55251, 'job': 'Director', 'name': 'Asif Kapadia', 'profile_path': '/tJzwAft3zkDSRqpBsNg6ScpawQG.jpg'}, {'credit_id': '56bf955bc3a368180a00a63c', 'department': 'Writing', 'gender': 0, 'id': 55251, 'job': 'Writer', 'name': 'Asif Kapadia', 'profile_path': '/tJzwAft3zkDSRqpBsNg6ScpawQG.jpg'}, {'credit_id': '56bf95839251410bb0001840', 'department': 'Writing', 'gender': 2, 'id': 55252, 'job': 'Writer', 'name': 'Tim Miller', 'profile_path': '/ebnkxctPa86d2AaCX7MDNNxAhbJ.jpg'}] 23305 +12066 [{'cast_id': 1000, 'character': 'Martha', 'credit_id': '52fe46199251416c7506d0ed', 'gender': 0, 'id': 567601, 'name': 'Debbie Doebereiner', 'order': 0, 'profile_path': None}, {'cast_id': 1001, 'character': "Martha's Father", 'credit_id': '52fe46199251416c7506d0f1', 'gender': 0, 'id': 567602, 'name': 'Omar Cowan', 'order': 1, 'profile_path': None}, {'cast_id': 1002, 'character': 'Kyle (as Dustin Ashley)', 'credit_id': '52fe46199251416c7506d0f5', 'gender': 0, 'id': 567603, 'name': 'Dustin James Ashley', 'order': 2, 'profile_path': None}, {'cast_id': 1003, 'character': 'Bakery Shopkeeper', 'credit_id': '52fe46199251416c7506d0f9', 'gender': 0, 'id': 567604, 'name': 'Phyllis Workman', 'order': 3, 'profile_path': None}] [{'credit_id': '52fe46199251416c7506d0ff', 'department': 'Directing', 'gender': 2, 'id': 1884, 'job': 'Director', 'name': 'Steven Soderbergh', 'profile_path': '/dxdMRsAosaGlMRd7EMmm9lrXXsW.jpg'}, {'credit_id': '52fe46199251416c7506d10b', 'department': 'Camera', 'gender': 2, 'id': 1884, 'job': 'Director of Photography', 'name': 'Steven Soderbergh', 'profile_path': '/dxdMRsAosaGlMRd7EMmm9lrXXsW.jpg'}, {'credit_id': '52fe46199251416c7506d111', 'department': 'Editing', 'gender': 2, 'id': 1884, 'job': 'Editor', 'name': 'Steven Soderbergh', 'profile_path': '/dxdMRsAosaGlMRd7EMmm9lrXXsW.jpg'}, {'credit_id': '52fe46199251416c7506d105', 'department': 'Writing', 'gender': 2, 'id': 150180, 'job': 'Writer', 'name': 'Coleman Hough', 'profile_path': None}] 14788 +13375 [{'cast_id': 1, 'character': 'Pekka', 'credit_id': '52fe4ac79251416c750edd1d', 'gender': 2, 'id': 108471, 'name': 'Petteri Summanen', 'order': 0, 'profile_path': '/l8qfdQLmxGqetZcOXsnP3Ojs32B.jpg'}, {'cast_id': 13, 'character': 'Ismo Valinto', 'credit_id': '592ef81792514130de01097e', 'gender': 2, 'id': 79764, 'name': 'Ismo Kallio', 'order': 1, 'profile_path': None}, {'cast_id': 3, 'character': 'Arto Suominen', 'credit_id': '52fe4ac89251416c750edd25', 'gender': 0, 'id': 108477, 'name': 'Eppu Salminen', 'order': 2, 'profile_path': '/56xNN1JCtMJ4unrtSUEXz1DxtCM.jpg'}, {'cast_id': 4, 'character': 'Anne Hartela', 'credit_id': '52fe4ac89251416c750edd29', 'gender': 1, 'id': 85386, 'name': 'Irina Björklund', 'order': 3, 'profile_path': '/kfPRkNXKhcD3ufp0XMrjkQONFoT.jpg'}, {'cast_id': 5, 'character': 'Aleksi Partio', 'credit_id': '52fe4ac89251416c750edd2d', 'gender': 2, 'id': 124285, 'name': 'Hannu-Pekka Björkman', 'order': 4, 'profile_path': '/6gHHzIqSGCQdoaWy85ZKDjxDpRz.jpg'}, {'cast_id': 6, 'character': 'Laura Koskimies', 'credit_id': '52fe4ac89251416c750edd31', 'gender': 0, 'id': 108472, 'name': 'Jenni Banerjee', 'order': 5, 'profile_path': '/ysFIB6ejEAhp6VwpTbNKGSldzBb.jpg'}, {'cast_id': 7, 'character': 'Juha Pasanen', 'credit_id': '52fe4ac89251416c750edd35', 'gender': 2, 'id': 108476, 'name': 'Mikko Leppilampi', 'order': 6, 'profile_path': '/jkeUShOz01YAkmRO0AmB9v9qlfB.jpg'}, {'cast_id': 8, 'character': 'Hanna Kajaste', 'credit_id': '52fe4ac89251416c750edd39', 'gender': 0, 'id': 108473, 'name': 'Lena Meriläinen', 'order': 7, 'profile_path': None}, {'cast_id': 9, 'character': 'Mari Koski', 'credit_id': '52fe4ac89251416c750edd3d', 'gender': 1, 'id': 108475, 'name': 'Mari Perankoski', 'order': 8, 'profile_path': '/cxwu9zwvaqlRDKOhv0ZL0T3auFF.jpg'}, {'cast_id': 10, 'character': 'Risto Vierikko', 'credit_id': '52fe4ac89251416c750edd41', 'gender': 0, 'id': 89512, 'name': 'Risto Kaskilahti', 'order': 9, 'profile_path': None}] [{'credit_id': '52fe4ac89251416c750edd47', 'department': 'Directing', 'gender': 2, 'id': 108481, 'job': 'Director', 'name': 'JP Siili', 'profile_path': None}, {'credit_id': '52fe4ac89251416c750edd4d', 'department': 'Writing', 'gender': 2, 'id': 108481, 'job': 'Writer', 'name': 'JP Siili', 'profile_path': None}] 141971 +15074 [{'cast_id': 1, 'character': 'Catherine Barkley', 'credit_id': '52fe444ac3a368484e01aad5', 'gender': 0, 'id': 47439, 'name': 'Helen Hayes', 'order': 0, 'profile_path': '/6QJDTvIT0v5E9pR1rgPtq59Ej8.jpg'}, {'cast_id': 2, 'character': 'Lieutenant Frederic Henry', 'credit_id': '52fe444ac3a368484e01aad9', 'gender': 2, 'id': 4068, 'name': 'Gary Cooper', 'order': 1, 'profile_path': '/zVUK71x5IoBaJ5H9dTgE2CRmLKy.jpg'}, {'cast_id': 3, 'character': 'Major Rinaldi', 'credit_id': '52fe444ac3a368484e01aadd', 'gender': 2, 'id': 14563, 'name': 'Adolphe Menjou', 'order': 2, 'profile_path': '/jAZRVhDGvEfRfJgGQCGk1GXMp1C.jpg'}, {'cast_id': 4, 'character': 'Helen Ferguson', 'credit_id': '52fe444ac3a368484e01aae1', 'gender': 0, 'id': 48960, 'name': 'Mary Philips', 'order': 3, 'profile_path': '/A0s0J8W0rAXBrJYKXNiHEuldlTK.jpg'}, {'cast_id': 8, 'character': 'Priest', 'credit_id': '52fe444ac3a368484e01aaf7', 'gender': 2, 'id': 99347, 'name': 'Jack La Rue', 'order': 4, 'profile_path': '/hWOwpkNevqBwp2mrjMCrLrxvPsi.jpg'}, {'cast_id': 9, 'character': 'Head Nurse', 'credit_id': '52fe444ac3a368484e01aafb', 'gender': 0, 'id': 30159, 'name': 'Blanche Friderici', 'order': 5, 'profile_path': '/wIwVP4Y505MiN8qesi52Al8yUTC.jpg'}, {'cast_id': 10, 'character': 'Miss Van Campen', 'credit_id': '52fe444ac3a368484e01aaff', 'gender': 1, 'id': 81941, 'name': 'Mary Forbes', 'order': 6, 'profile_path': '/S3g8mvqSDng5oQCfwkELQ68kw4.jpg'}, {'cast_id': 11, 'character': 'British Major', 'credit_id': '52fe444ac3a368484e01ab03', 'gender': 0, 'id': 95757, 'name': 'Gilbert Emery', 'order': 7, 'profile_path': '/gKJeGDvOVIHqmtjdED9X6hfYsK.jpg'}, {'cast_id': 34, 'character': 'Giulio - Hospital Porter (uncredited)', 'credit_id': '598072eec3a368103901050d', 'gender': 2, 'id': 1016595, 'name': 'Agostino Borgato', 'order': 8, 'profile_path': None}] [{'credit_id': '56d73d3e9251414291002436', 'department': 'Camera', 'gender': 2, 'id': 3148, 'job': 'Director of Photography', 'name': 'Charles Lang', 'profile_path': '/1k74XckQqxOwdPYvhvhEdZ9jl4l.jpg'}, {'credit_id': '56d73d6ec3a3681e3601a687', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 7652, 'job': 'Costume Design', 'name': 'Travis Banton', 'profile_path': None}, {'credit_id': '56d73d66c3a36870ac007881', 'department': 'Art', 'gender': 2, 'id': 8622, 'job': 'Art Direction', 'name': 'Hans Dreier', 'profile_path': '/pyVz3McZwtOFOVjx3jSqawd45Zj.jpg'}, {'credit_id': '56d73da192514111eb005938', 'department': 'Sound', 'gender': 2, 'id': 9107, 'job': 'Sound', 'name': 'Harold Lewis', 'profile_path': None}, {'credit_id': '56d73d09925141719e005298', 'department': 'Sound', 'gender': 0, 'id': 13336, 'job': 'Original Music Composer', 'name': 'W. Franke Harling', 'profile_path': None}, {'credit_id': '52fe444ac3a368484e01aae7', 'department': 'Directing', 'gender': 2, 'id': 14855, 'job': 'Director', 'name': 'Frank Borzage', 'profile_path': '/ywsES5vf5uH4GelIDmaJWJAhdOG.jpg'}, {'credit_id': '56d73d47c3a3681e3601a67f', 'department': 'Editing', 'gender': 2, 'id': 14961, 'job': 'Editor', 'name': 'Otho Lovering', 'profile_path': None}, {'credit_id': '56d73d11c3a3681e3c01d459', 'department': 'Sound', 'gender': 2, 'id': 29810, 'job': 'Original Music Composer', 'name': 'Bernhard Kaun', 'profile_path': None}, {'credit_id': '56d73d19925141391f0072fa', 'department': 'Sound', 'gender': 0, 'id': 30256, 'job': 'Original Music Composer', 'name': 'John Leipold', 'profile_path': None}, {'credit_id': '52fe444ac3a368484e01aaed', 'department': 'Writing', 'gender': 2, 'id': 38233, 'job': 'Novel', 'name': 'Ernest Hemingway', 'profile_path': '/xchDU31q3QZqhS2mp3mqpBYd0kS.jpg'}, {'credit_id': '56d73d5fc3a3681e3601a683', 'department': 'Art', 'gender': 0, 'id': 47077, 'job': 'Art Direction', 'name': 'Roland Anderson', 'profile_path': None}, {'credit_id': '56d73ce1c3a3682cef009918', 'department': 'Writing', 'gender': 2, 'id': 86404, 'job': 'Screenplay', 'name': 'Oliver H.P. Garrett', 'profile_path': None}, {'credit_id': '52fe444ac3a368484e01aaf3', 'department': 'Writing', 'gender': 2, 'id': 89045, 'job': 'Screenplay', 'name': 'Benjamin Glazer', 'profile_path': None}, {'credit_id': '56d73cf6c3a3681e3601a676', 'department': 'Production', 'gender': 2, 'id': 89045, 'job': 'Associate Producer', 'name': 'Benjamin Glazer', 'profile_path': None}, {'credit_id': '56d73ceec3a36870ac007872', 'department': 'Production', 'gender': 0, 'id': 95752, 'job': 'Associate Producer', 'name': 'Edward A. Blatt', 'profile_path': None}, {'credit_id': '56d73d30c3a3681e50019324', 'department': 'Sound', 'gender': 0, 'id': 121115, 'job': 'Original Music Composer', 'name': 'Milan Roder', 'profile_path': None}, {'credit_id': '56d73d50925141391f007303', 'department': 'Editing', 'gender': 0, 'id': 585135, 'job': 'Editor', 'name': 'George Nichols Jr.', 'profile_path': None}, {'credit_id': '56d73d2992514112cd0037c6', 'department': 'Sound', 'gender': 2, 'id': 998190, 'job': 'Original Music Composer', 'name': 'Ralph Rainger', 'profile_path': None}, {'credit_id': '56d73d0192514111eb005921', 'department': 'Sound', 'gender': 2, 'id': 1092864, 'job': 'Original Music Composer', 'name': 'Herman Hand', 'profile_path': None}, {'credit_id': '56d73d7892514111eb00592f', 'department': 'Directing', 'gender': 2, 'id': 1456433, 'job': 'Assistant Director', 'name': 'Lew Borzage', 'profile_path': None}, {'credit_id': '56d73d8492514111eb005932', 'department': 'Directing', 'gender': 0, 'id': 1469564, 'job': 'Assistant Director', 'name': 'Charles Griffin', 'profile_path': None}, {'credit_id': '56d73d21925141391f0072fe', 'department': 'Sound', 'gender': 2, 'id': 1532469, 'job': 'Original Music Composer', 'name': 'Paul Marquardt', 'profile_path': None}, {'credit_id': '56d73d57c3a3681e4a01a17b', 'department': 'Production', 'gender': 0, 'id': 1532472, 'job': 'Casting', 'name': 'Fred A. Datig', 'profile_path': None}, {'credit_id': '56d73d8c92514118840064a6', 'department': 'Directing', 'gender': 0, 'id': 1533674, 'job': 'Assistant Director', 'name': 'Arthur Jacobson', 'profile_path': None}, {'credit_id': '56d73d99925141391f00730f', 'department': 'Sound', 'gender': 0, 'id': 1585484, 'job': 'Sound', 'name': 'Franklin Hansen', 'profile_path': None}] 22649 +15765 [{'cast_id': 4, 'character': '', 'credit_id': '5725a60ac3a36833b4001a90', 'gender': 0, 'id': 1613567, 'name': 'Sima Mobarak-Shahi', 'order': 1, 'profile_path': None}, {'cast_id': 5, 'character': '', 'credit_id': '5725a612c3a36833b4001a96', 'gender': 0, 'id': 1613568, 'name': 'Shayesteh Irani', 'order': 2, 'profile_path': None}, {'cast_id': 6, 'character': '', 'credit_id': '5725a61bc3a3683841000826', 'gender': 0, 'id': 1613569, 'name': 'Ayda Sadeqi', 'order': 3, 'profile_path': None}, {'cast_id': 7, 'character': '', 'credit_id': '5725a62492514122a90060d0', 'gender': 0, 'id': 1613570, 'name': 'Golnaz Farmani', 'order': 4, 'profile_path': None}, {'cast_id': 8, 'character': '', 'credit_id': '5725a636c3a3681c2600223f', 'gender': 0, 'id': 1613571, 'name': 'Mahnaz Zabihi', 'order': 5, 'profile_path': None}, {'cast_id': 9, 'character': '', 'credit_id': '5725a63f9251411a1400465d', 'gender': 0, 'id': 1613572, 'name': 'Nazanin Sediq-zadeh', 'order': 6, 'profile_path': None}] [{'credit_id': '52fe454f9251416c75052209', 'department': 'Directing', 'gender': 2, 'id': 120229, 'job': 'Director', 'name': 'Jafar Panahi', 'profile_path': '/a8HN99enx3plYkpb1UK1Jl6qnmS.jpg'}, {'credit_id': '52fe454f9251416c7505220f', 'department': 'Writing', 'gender': 2, 'id': 120229, 'job': 'Writer', 'name': 'Jafar Panahi', 'profile_path': '/a8HN99enx3plYkpb1UK1Jl6qnmS.jpg'}, {'credit_id': '52fe454f9251416c75052215', 'department': 'Writing', 'gender': 0, 'id': 129890, 'job': 'Writer', 'name': 'Shadmehr Rastin', 'profile_path': None}, {'credit_id': '591d891dc3a3687a6401eaa3', 'department': 'Crew', 'gender': 0, 'id': 1819067, 'job': 'Cinematography', 'name': 'Rami Agami', 'profile_path': None}] 13209 +16764 [{'cast_id': 1, 'character': 'Pekka', 'credit_id': '52fe4ac79251416c750edd1d', 'gender': 2, 'id': 108471, 'name': 'Petteri Summanen', 'order': 0, 'profile_path': '/l8qfdQLmxGqetZcOXsnP3Ojs32B.jpg'}, {'cast_id': 13, 'character': 'Ismo Valinto', 'credit_id': '592ef81792514130de01097e', 'gender': 2, 'id': 79764, 'name': 'Ismo Kallio', 'order': 1, 'profile_path': None}, {'cast_id': 3, 'character': 'Arto Suominen', 'credit_id': '52fe4ac89251416c750edd25', 'gender': 0, 'id': 108477, 'name': 'Eppu Salminen', 'order': 2, 'profile_path': '/56xNN1JCtMJ4unrtSUEXz1DxtCM.jpg'}, {'cast_id': 4, 'character': 'Anne Hartela', 'credit_id': '52fe4ac89251416c750edd29', 'gender': 1, 'id': 85386, 'name': 'Irina Björklund', 'order': 3, 'profile_path': '/kfPRkNXKhcD3ufp0XMrjkQONFoT.jpg'}, {'cast_id': 5, 'character': 'Aleksi Partio', 'credit_id': '52fe4ac89251416c750edd2d', 'gender': 2, 'id': 124285, 'name': 'Hannu-Pekka Björkman', 'order': 4, 'profile_path': '/6gHHzIqSGCQdoaWy85ZKDjxDpRz.jpg'}, {'cast_id': 6, 'character': 'Laura Koskimies', 'credit_id': '52fe4ac89251416c750edd31', 'gender': 0, 'id': 108472, 'name': 'Jenni Banerjee', 'order': 5, 'profile_path': '/ysFIB6ejEAhp6VwpTbNKGSldzBb.jpg'}, {'cast_id': 7, 'character': 'Juha Pasanen', 'credit_id': '52fe4ac89251416c750edd35', 'gender': 2, 'id': 108476, 'name': 'Mikko Leppilampi', 'order': 6, 'profile_path': '/jkeUShOz01YAkmRO0AmB9v9qlfB.jpg'}, {'cast_id': 8, 'character': 'Hanna Kajaste', 'credit_id': '52fe4ac89251416c750edd39', 'gender': 0, 'id': 108473, 'name': 'Lena Meriläinen', 'order': 7, 'profile_path': None}, {'cast_id': 9, 'character': 'Mari Koski', 'credit_id': '52fe4ac89251416c750edd3d', 'gender': 1, 'id': 108475, 'name': 'Mari Perankoski', 'order': 8, 'profile_path': '/cxwu9zwvaqlRDKOhv0ZL0T3auFF.jpg'}, {'cast_id': 10, 'character': 'Risto Vierikko', 'credit_id': '52fe4ac89251416c750edd41', 'gender': 0, 'id': 89512, 'name': 'Risto Kaskilahti', 'order': 9, 'profile_path': None}] [{'credit_id': '52fe4ac89251416c750edd47', 'department': 'Directing', 'gender': 2, 'id': 108481, 'job': 'Director', 'name': 'JP Siili', 'profile_path': None}, {'credit_id': '52fe4ac89251416c750edd4d', 'department': 'Writing', 'gender': 2, 'id': 108481, 'job': 'Writer', 'name': 'JP Siili', 'profile_path': None}] 141971 +20842 [{'cast_id': 2, 'character': 'Sultan Amar', 'credit_id': '52fe4962c3a368484e1289e3', 'gender': 2, 'id': 2983, 'name': 'Mark Strong', 'order': 0, 'profile_path': '/tr8nXYhY1ZWGmsXuHFHk6MBeL9Q.jpg'}, {'cast_id': 3, 'character': 'Emir Nesib', 'credit_id': '52fe4962c3a368484e1289e7', 'gender': 2, 'id': 3131, 'name': 'Antonio Banderas', 'order': 1, 'profile_path': '/85197jARsr06xQ84NhP9YoBL3sR.jpg'}, {'cast_id': 4, 'character': 'Leyla', 'credit_id': '52fe4962c3a368484e1289eb', 'gender': 1, 'id': 76792, 'name': 'Freida Pinto', 'order': 2, 'profile_path': '/hz12Lob7MclGLzMIX5rTUFhmDCW.jpg'}, {'cast_id': 5, 'character': 'Auda', 'credit_id': '52fe4962c3a368484e1289ef', 'gender': 2, 'id': 81051, 'name': 'Tahar Rahim', 'order': 3, 'profile_path': '/iqXBJXM6XP3E3nivEJiyfUK7mot.jpg'}, {'cast_id': 6, 'character': 'Ali', 'credit_id': '52fe4962c3a368484e1289f3', 'gender': 2, 'id': 53240, 'name': 'Riz Ahmed', 'order': 4, 'profile_path': '/ctzvoIBaV2OBIK7xl6gx2wM3lPi.jpg'}, {'cast_id': 7, 'character': 'Sheikh of Bani Sirri', 'credit_id': '52fe4962c3a368484e1289f7', 'gender': 2, 'id': 931638, 'name': 'Lotfi Dziri', 'order': 5, 'profile_path': '/sHdhzm7qW0EdCLfEJWITipT5Wp.jpg'}, {'cast_id': 52, 'character': 'Aicha', 'credit_id': '570f6f1392514143da000de3', 'gender': 0, 'id': 111662, 'name': 'Liya Kebede', 'order': 6, 'profile_path': '/y1qgV6iP5JOa6ZsGfakKUa0rQMZ.jpg'}, {'cast_id': 53, 'character': 'Thurkettle', 'credit_id': '570f6f2dc3a3685385002b4a', 'gender': 0, 'id': 17199, 'name': 'Corey Johnson', 'order': 7, 'profile_path': '/8jS7kzs01qMOHIltiDI2TQckmgs.jpg'}, {'cast_id': 54, 'character': 'Saleh', 'credit_id': '570f6f47c3a3685385002b4e', 'gender': 0, 'id': 1261092, 'name': 'Akin Gazi', 'order': 8, 'profile_path': '/nCSIiFVWWSkON9b5zmrC4RvwmWI.jpg'}, {'cast_id': 55, 'character': 'Hassan Dakhil', 'credit_id': '570f6f6992514120e80002d3', 'gender': 2, 'id': 25078, 'name': 'Eriq Ebouaney', 'order': 9, 'profile_path': '/1tyJreHjH0oVt2innMAWbHNMl40.jpg'}] [{'credit_id': '52fe4962c3a368484e1289fd', 'department': 'Writing', 'gender': 2, 'id': 736, 'job': 'Screenplay', 'name': 'Menno Meyjes', 'profile_path': None}, {'credit_id': '571c2337925141715d005fd9', 'department': 'Sound', 'gender': 2, 'id': 1729, 'job': 'Original Music Composer', 'name': 'James Horner', 'profile_path': '/oLOtXxXsYk8X4qq0ud4xVypXudi.jpg'}, {'credit_id': '52fe4962c3a368484e1289df', 'department': 'Directing', 'gender': 2, 'id': 2352, 'job': 'Director', 'name': 'Jean-Jacques Annaud', 'profile_path': '/sxNr4V5xESbnn6To1etsLoPj5SC.jpg'}, {'credit_id': '570f69da9251416a560007cc', 'department': 'Writing', 'gender': 2, 'id': 2352, 'job': 'Adaptation', 'name': 'Jean-Jacques Annaud', 'profile_path': '/sxNr4V5xESbnn6To1etsLoPj5SC.jpg'}, {'credit_id': '570f69ec92514133f3000fe6', 'department': 'Writing', 'gender': 0, 'id': 2358, 'job': 'Adaptation', 'name': 'Alain Godard', 'profile_path': None}, {'credit_id': '570f6a8ac3a36860a8000c52', 'department': 'Editing', 'gender': 2, 'id': 2425, 'job': 'Editor', 'name': 'Hervé Schneid', 'profile_path': None}, {'credit_id': '570f6b2bc3a36853730026b8', 'department': 'Sound', 'gender': 0, 'id': 6883, 'job': 'Music Editor', 'name': 'Dick Bernstein', 'profile_path': None}, {'credit_id': '570f6d9292514102b90028b8', 'department': 'Sound', 'gender': 0, 'id': 40814, 'job': 'ADR & Dubbing', 'name': 'Lee Herrick', 'profile_path': None}, {'credit_id': '570f6a1792514120e8000211', 'department': 'Production', 'gender': 2, 'id': 16787, 'job': 'Producer', 'name': 'Tarak Ben Ammar', 'profile_path': None}, {'credit_id': '570f6dc1c3a3685373002726', 'department': 'Sound', 'gender': 0, 'id': 17428, 'job': 'Foley', 'name': 'Nicolas Becker', 'profile_path': None}, {'credit_id': '570f6a5f92514102b90027c2', 'department': 'Camera', 'gender': 0, 'id': 20524, 'job': 'Director of Photography', 'name': 'Jean-Marie Dreujou', 'profile_path': None}, {'credit_id': '570f6a9a9251416a560007ee', 'department': 'Art', 'gender': 0, 'id': 20525, 'job': 'Production Design', 'name': 'Pierre Queffelean', 'profile_path': None}, {'credit_id': '570f6e0bc3a36860a8000ccb', 'department': 'Sound', 'gender': 0, 'id': 24006, 'job': 'Sound Re-Recording Mixer', 'name': 'Vincent Arnardi', 'profile_path': None}, {'credit_id': '570f6e24925141016f001994', 'department': 'Sound', 'gender': 0, 'id': 125895, 'job': 'Sound Re-Recording Mixer', 'name': 'Julien Perez', 'profile_path': None}, {'credit_id': '570f6ab9c3a368538b002a53', 'department': 'Art', 'gender': 0, 'id': 1069458, 'job': 'Set Decoration', 'name': 'Fabienne Guillot', 'profile_path': None}, {'credit_id': '570f6e979251416bff0008a8', 'department': 'Editing', 'gender': 0, 'id': 1274261, 'job': 'First Assistant Editor', 'name': 'Anita Roth', 'profile_path': None}, {'credit_id': '570f6c55c3a368538b002a92', 'department': 'Crew', 'gender': 0, 'id': 1337035, 'job': 'Makeup Effects', 'name': 'Amélie Grossier', 'profile_path': None}, {'credit_id': '570f6e61c3a36853780028dd', 'department': 'Sound', 'gender': 0, 'id': 1351723, 'job': 'Supervising Sound Effects Editor', 'name': 'Frédéric Le Louet', 'profile_path': None}, {'credit_id': '570f6c3092514143da000d8c', 'department': 'Crew', 'gender': 0, 'id': 1367803, 'job': 'Makeup Effects', 'name': 'Christophe Chabenet', 'profile_path': None}, {'credit_id': '570f6b57c3a368503c000fca', 'department': 'Crew', 'gender': 0, 'id': 1367808, 'job': 'Armorer', 'name': 'Stéphane Linet', 'profile_path': None}, {'credit_id': '570f6bd19251416a56000814', 'department': 'Camera', 'gender': 0, 'id': 1371220, 'job': 'Steadicam Operator', 'name': 'Jan Rubens', 'profile_path': None}, {'credit_id': '570f6be1c3a36853730026d6', 'department': 'Camera', 'gender': 0, 'id': 1374471, 'job': 'Still Photographer', 'name': 'David Koskas', 'profile_path': None}, {'credit_id': '570f6aa99251416bff000814', 'department': 'Art', 'gender': 1, 'id': 1393782, 'job': 'Art Direction', 'name': 'Emma Pucci', 'profile_path': None}, {'credit_id': '570f6b3cc3a368538b002a67', 'department': 'Sound', 'gender': 2, 'id': 1399326, 'job': 'Music Editor', 'name': 'Jim Henrikson', 'profile_path': None}, {'credit_id': '570f6e4ac3a368538b002adf', 'department': 'Sound', 'gender': 0, 'id': 1402247, 'job': 'Supervising Sound Editor', 'name': 'Selim Azzazi', 'profile_path': None}, {'credit_id': '570f6cbf92514143da000d9e', 'department': 'Crew', 'gender': 0, 'id': 1414551, 'job': 'Visual Effects Editor', 'name': 'David Gourmaud', 'profile_path': None}, {'credit_id': '570f6d7192514120e8000285', 'department': 'Sound', 'gender': 0, 'id': 1421266, 'job': 'ADR & Dubbing', 'name': 'Jennie Evans', 'profile_path': None}, {'credit_id': '570f6da5c3a36860a8000cb3', 'department': 'Editing', 'gender': 0, 'id': 1449985, 'job': 'Dialogue Editor', 'name': 'Capucine Courau', 'profile_path': None}, {'credit_id': '570f6ac9925141016f001911', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 1500859, 'job': 'Costume Design', 'name': 'Sarah Ellis', 'profile_path': None}, {'credit_id': '570f6c14c3a3684ff1000f6c', 'department': 'Crew', 'gender': 0, 'id': 1556128, 'job': 'CG Supervisor', 'name': 'Benoit De Longlee', 'profile_path': None}, {'credit_id': '570f6c94c3a36853730026f9', 'department': 'Visual Effects', 'gender': 0, 'id': 1580667, 'job': 'Special Effects Supervisor', 'name': 'Uli Nefzer', 'profile_path': None}, {'credit_id': '570f69fb9251416a560007d3', 'department': 'Writing', 'gender': 2, 'id': 1605587, 'job': 'Novel', 'name': 'Hans Ruesch', 'profile_path': None}, {'credit_id': '570f6ae6c3a368503c000fb6', 'department': 'Art', 'gender': 0, 'id': 1605588, 'job': 'Assistant Art Director', 'name': 'Lilith Bekmezian', 'profile_path': None}, {'credit_id': '570f6af6925141016f00191e', 'department': 'Art', 'gender': 0, 'id': 1605589, 'job': 'Assistant Art Director', 'name': 'Thierry Poulet', 'profile_path': None}, {'credit_id': '570f6b16c3a36853730026b1', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 1605590, 'job': 'Assistant Costume Designer', 'name': 'Céline Pelé', 'profile_path': None}, {'credit_id': '570f6b6bc3a3684ff1000f47', 'department': 'Directing', 'gender': 0, 'id': 1605592, 'job': 'Script Supervisor', 'name': 'Natasha Gomes de Almeida', 'profile_path': None}, {'credit_id': '570f6b92c3a3684ff1000f51', 'department': 'Crew', 'gender': 0, 'id': 1605593, 'job': 'Second Unit Cinematographer', 'name': 'Susana Gomes', 'profile_path': None}, {'credit_id': '570f6ba992514120e8000241', 'department': 'Camera', 'gender': 0, 'id': 1605594, 'job': 'First Assistant Camera', 'name': 'Denis Garnier', 'profile_path': None}, {'credit_id': '570f6c769251416bff000853', 'department': 'Crew', 'gender': 0, 'id': 1605595, 'job': 'Special Effects Coordinator', 'name': 'Absi Anis', 'profile_path': None}, {'credit_id': '570f6cdcc3a368503c000ffe', 'department': 'Crew', 'gender': 0, 'id': 1605596, 'job': 'Visual Effects Editor', 'name': 'Laurent Ripoll', 'profile_path': None}, {'credit_id': '570f6d0292514102b90028a1', 'department': 'Visual Effects', 'gender': 0, 'id': 1605598, 'job': 'VFX Supervisor', 'name': 'Eric Carme', 'profile_path': None}, {'credit_id': '570f6d43c3a36860a8000ca3', 'department': 'Visual Effects', 'gender': 0, 'id': 1605599, 'job': 'Visual Effects Coordinator', 'name': 'Virginie Wintrebert', 'profile_path': None}, {'credit_id': '570f6dd2c3a36860a8000cbf', 'department': 'Sound', 'gender': 0, 'id': 1605600, 'job': 'Foley', 'name': 'Nicolas Fioraso', 'profile_path': None}, {'credit_id': '570f6defc3a368538b002acb', 'department': 'Sound', 'gender': 0, 'id': 1605601, 'job': 'Sound Effects Editor', 'name': 'Cedric Denooz', 'profile_path': None}, {'credit_id': '570f6e8292514120e80002b7', 'department': 'Editing', 'gender': 0, 'id': 1605605, 'job': 'Assistant Editor', 'name': 'Sarah Zaanoun', 'profile_path': None}] 77221 +20898 [{'cast_id': 1, 'character': 'Liz Hamilton', 'credit_id': '52fe4ac8c3a36847f81e1423', 'gender': 1, 'id': 14061, 'name': 'Jacqueline Bisset', 'order': 0, 'profile_path': '/ys2YfG4d26jUiGvok0jplWgUhzP.jpg'}, {'cast_id': 2, 'character': 'Merry Noel Blake', 'credit_id': '52fe4ac8c3a36847f81e1427', 'gender': 1, 'id': 11850, 'name': 'Candice Bergen', 'order': 1, 'profile_path': '/wzRihM7eZyCyAuKpgyBewCzDRWJ.jpg'}, {'cast_id': 3, 'character': "Douglas 'Doug' Blake", 'credit_id': '52fe4ac8c3a36847f81e142b', 'gender': 2, 'id': 56930, 'name': 'David Selby', 'order': 2, 'profile_path': '/31FZf0TdcYm53S9R9q1KWNNxVaH.jpg'}, {'cast_id': 5, 'character': 'Jules Levi', 'credit_id': '52fe4ac8c3a36847f81e142f', 'gender': 0, 'id': 21521, 'name': 'Steven Hill', 'order': 3, 'profile_path': '/bFmYHIgtlvGkAZirHVfNL1C0Ry7.jpg'}, {'cast_id': 6, 'character': 'Debby Blake, 18 Years', 'credit_id': '52fe4ac8c3a36847f81e1433', 'gender': 1, 'id': 5344, 'name': 'Meg Ryan', 'order': 4, 'profile_path': '/iv33eEcSakPCkO2MiR4bIZpjgyg.jpg'}, {'cast_id': 7, 'character': 'Jim, the Boy', 'credit_id': '52fe4ac8c3a36847f81e1437', 'gender': 0, 'id': 22490, 'name': 'Matt Lattanzi', 'order': 5, 'profile_path': '/ewiB9BXaynLLibBXIrL56DTzOpL.jpg'}, {'cast_id': 8, 'character': 'Ginger Trinidad', 'credit_id': '52fe4ac8c3a36847f81e143b', 'gender': 2, 'id': 57598, 'name': 'Daniel Faraldo', 'order': 6, 'profile_path': '/sbGm3pgHEVTEuSUFStsDG0uVGW5.jpg'}, {'cast_id': 9, 'character': 'Debby Blake, 8 Years', 'credit_id': '52fe4ac8c3a36847f81e143f', 'gender': 1, 'id': 44711, 'name': 'Nicole Eggert', 'order': 7, 'profile_path': '/xkpPODDLjrwcNNaDcWcbnwwPWJs.jpg'}, {'cast_id': 11, 'character': "Christopher 'Chris' Adams", 'credit_id': '52fe4ac8c3a36847f81e1449', 'gender': 2, 'id': 7678, 'name': 'Hart Bochner', 'order': 8, 'profile_path': '/2gCrMv3yJlFQxloacxjTAcXGEAz.jpg'}] [{'credit_id': '52fe4ac8c3a36847f81e1445', 'department': 'Directing', 'gender': 0, 'id': 14674, 'job': 'Director', 'name': 'George Cukor', 'profile_path': '/s6OKrb7REayEl3NHAQVJXvD9KrX.jpg'}, {'credit_id': '52fe4ac8c3a36847f81e1455', 'department': 'Writing', 'gender': 2, 'id': 20599, 'job': 'Theatre Play', 'name': 'John Van Druten', 'profile_path': None}, {'credit_id': '52fe4ac8c3a36847f81e144f', 'department': 'Writing', 'gender': 2, 'id': 77489, 'job': 'Screenplay', 'name': 'Gerald Ayres', 'profile_path': None}] 109962 +21115 [{'cast_id': 3, 'character': 'Himself', 'credit_id': '52fe48e09251416c9109b351', 'gender': 2, 'id': 1229, 'name': 'Jeff Bridges', 'order': 0, 'profile_path': '/xms1RAY6q7Lzp7wNeRCB0kzhucn.jpg'}, {'cast_id': 4, 'character': 'Himself', 'credit_id': '52fe48e09251416c9109b355', 'gender': 2, 'id': 219917, 'name': 'Tom Colicchio', 'order': 1, 'profile_path': '/dVckHCDbhuN3r8K324WdFP6m5G5.jpg'}, {'cast_id': 5, 'character': 'Herself', 'credit_id': '52fe48e09251416c9109b359', 'gender': 0, 'id': 1155683, 'name': 'Mariana Chilton', 'order': 2, 'profile_path': None}, {'cast_id': 6, 'character': 'Himself', 'credit_id': '52fe48e09251416c9109b35d', 'gender': 0, 'id': 181789, 'name': 'Ken Cook', 'order': 3, 'profile_path': None}, {'cast_id': 7, 'character': 'Herself', 'credit_id': '52fe48e09251416c9109b361', 'gender': 0, 'id': 1155684, 'name': 'Barbie Izquierdo', 'order': 4, 'profile_path': None}, {'cast_id': 8, 'character': 'Himself', 'credit_id': '52fe48e09251416c9109b365', 'gender': 0, 'id': 1155685, 'name': 'James McGovern', 'order': 5, 'profile_path': None}, {'cast_id': 9, 'character': 'Herself', 'credit_id': '52fe48e09251416c9109b369', 'gender': 0, 'id': 1018909, 'name': 'Marion Nestle', 'order': 6, 'profile_path': None}, {'cast_id': 10, 'character': 'Himself', 'credit_id': '52fe48e09251416c9109b36d', 'gender': 0, 'id': 1155686, 'name': 'Raj Patel', 'order': 7, 'profile_path': None}, {'cast_id': 11, 'character': 'Herself', 'credit_id': '52fe48e09251416c9109b371', 'gender': 0, 'id': 1155687, 'name': 'Janet Poppendieck', 'order': 8, 'profile_path': None}] [{'credit_id': '52fe48e09251416c9109b347', 'department': 'Directing', 'gender': 1, 'id': 638550, 'job': 'Director', 'name': 'Kristi Jacobson', 'profile_path': '/7vM3pi4cnULXKtbV02U2DLfBDOx.jpg'}, {'credit_id': '52fe48e09251416c9109b34d', 'department': 'Directing', 'gender': 0, 'id': 930628, 'job': 'Director', 'name': 'Lori Silverbush', 'profile_path': '/rpA4olYv788AmQJHDxIgkE1PLfk.jpg'}] 84198 +21164 [] [{'credit_id': '52fe4c10c3a36847f821fbdd', 'department': 'Writing', 'gender': 2, 'id': 6210, 'job': 'Author', 'name': 'William Shakespeare', 'profile_path': '/2z4njqosEQM4g26ttb9grG9rJUg.jpg'}, {'credit_id': '52fe4c10c3a36847f821fbe3', 'department': 'Directing', 'gender': 2, 'id': 158839, 'job': 'Director', 'name': 'John Gorrie', 'profile_path': None}] 119916 +21853 [{'cast_id': 5, 'character': 'Robin Wright', 'credit_id': '52fe4b2c9251416c910d2557', 'gender': 1, 'id': 32, 'name': 'Robin Wright', 'order': 0, 'profile_path': '/tXfQTgcIEPP7gtVdJ44ZxZPhacn.jpg'}, {'cast_id': 7, 'character': 'Al', 'credit_id': '52fe4b2c9251416c910d255f', 'gender': 2, 'id': 1037, 'name': 'Harvey Keitel', 'order': 1, 'profile_path': '/4IcHhp1SCKijRxb7kqnxZNJuKdn.jpg'}, {'cast_id': 15, 'character': 'Dylan Truliner (voice)', 'credit_id': '52fe4b2d9251416c910d257f', 'gender': 2, 'id': 65717, 'name': 'Jon Hamm', 'order': 2, 'profile_path': '/wb14AA6u8dqA0XNweRIeKXatzzc.jpg'}, {'cast_id': 6, 'character': 'Dr Barker', 'credit_id': '52fe4b2c9251416c910d255b', 'gender': 2, 'id': 13242, 'name': 'Paul Giamatti', 'order': 3, 'profile_path': '/rX4LRmkYshMRfQ6lVbeZVAfqVKI.jpg'}, {'cast_id': 9, 'character': 'Jeff Green', 'credit_id': '52fe4b2c9251416c910d2567', 'gender': 2, 'id': 6413, 'name': 'Danny Huston', 'order': 4, 'profile_path': '/jc1eGtCShQ2ZkzqWApiWbA1lbTF.jpg'}, {'cast_id': 8, 'character': 'Aaron Wright', 'credit_id': '52fe4b2c9251416c910d2563', 'gender': 2, 'id': 113505, 'name': 'Kodi Smit-McPhee', 'order': 5, 'profile_path': '/vJLSwYtJH8NHEUDh508XxewnrjD.jpg'}, {'cast_id': 11, 'character': 'Sarah Wright', 'credit_id': '52fe4b2c9251416c910d256f', 'gender': 1, 'id': 506085, 'name': 'Sami Gayle', 'order': 6, 'profile_path': '/k3TMcxxdkNq5PNWA6ruahlGmmZY.jpg'}, {'cast_id': 13, 'character': 'Steve', 'credit_id': '52fe4b2d9251416c910d2577', 'gender': 2, 'id': 51991, 'name': 'Michael Stahl-David', 'order': 7, 'profile_path': '/vIMKz8ESDE5Ti80GsYtR9zxCeiC.jpg'}, {'cast_id': 33, 'character': 'Bobs', 'credit_id': '57e44a5392514112370085c8', 'gender': 2, 'id': 79025, 'name': 'Don McManus', 'order': 8, 'profile_path': '/8JOjJkZBQmhVb6n8yHw70LIgC7i.jpg'}, {'cast_id': 16, 'character': 'Man in Zeppelin', 'credit_id': '54957258c3a3686ae10041dd', 'gender': 2, 'id': 35431, 'name': 'Jörg Vincent Malotki', 'order': 9, 'profile_path': '/ufEb96rjRGV1xfqX7OmO6O79BoA.jpg'}] [{'credit_id': '55c0d60e9251410f19001cec', 'department': 'Production', 'gender': 1, 'id': 32, 'job': 'Producer', 'name': 'Robin Wright', 'profile_path': '/tXfQTgcIEPP7gtVdJ44ZxZPhacn.jpg'}, {'credit_id': '55c0d66f9251410f27001e31', 'department': 'Production', 'gender': 1, 'id': 3965, 'job': 'Casting', 'name': 'Deborah Aquila', 'profile_path': '/7OBiqW30sXcW4f2xMds53L4JBN5.jpg'}, {'credit_id': '5823222f9251410cac00bf74', 'department': 'Writing', 'gender': 0, 'id': 8453, 'job': 'Novel', 'name': 'Stanisław Lem', 'profile_path': '/kk2RRgH2lkvvtknnXOitJW0jcV2.jpg'}, {'credit_id': '55c0d56a9251410ee7001bfe', 'department': 'Production', 'gender': 0, 'id': 46259, 'job': 'Producer', 'name': 'Reinhard Brundig', 'profile_path': None}, {'credit_id': '55c0d62e9251410f58001ca9', 'department': 'Sound', 'gender': 2, 'id': 46266, 'job': 'Music', 'name': 'Max Richter', 'profile_path': '/keYPSx2YlyRCk8KY7YaZ81fZ1Y3.jpg'}, {'credit_id': '55c0d642c3a36822fd001b29', 'department': 'Crew', 'gender': 0, 'id': 53617, 'job': 'Cinematography', 'name': 'Michal Englert', 'profile_path': None}, {'credit_id': '52fe4b2c9251416c910d2547', 'department': 'Writing', 'gender': 2, 'id': 56198, 'job': 'Screenplay', 'name': 'Ari Folman', 'profile_path': '/xwCWMkK6mm6xSohXAXLyLVTddnO.jpg'}, {'credit_id': '52fe4b2c9251416c910d2541', 'department': 'Directing', 'gender': 2, 'id': 56198, 'job': 'Director', 'name': 'Ari Folman', 'profile_path': '/xwCWMkK6mm6xSohXAXLyLVTddnO.jpg'}, {'credit_id': '55c0d658c3a36822fd001b2b', 'department': 'Editing', 'gender': 0, 'id': 72574, 'job': 'Editor', 'name': 'Nili Feller', 'profile_path': None}, {'credit_id': '55c0d57e9251410ee7001c04', 'department': 'Production', 'gender': 2, 'id': 100748, 'job': 'Producer', 'name': 'Sébastien Delloye', 'profile_path': None}, {'credit_id': '55c0d6c3c3a36823f7001f90', 'department': 'Art', 'gender': 0, 'id': 124806, 'job': 'Production Design', 'name': 'David Polonsky', 'profile_path': None}, {'credit_id': '55c0d591c3a3682253001b86', 'department': 'Production', 'gender': 0, 'id': 936755, 'job': 'Producer', 'name': 'Piotr Dzieciol', 'profile_path': None}, {'credit_id': '55c0d5b6c3a3682379001e3c', 'department': 'Production', 'gender': 0, 'id': 979225, 'job': 'Producer', 'name': 'Ewa Puszczynska', 'profile_path': None}, {'credit_id': '55c0d6ac9251410f19001cf7', 'department': 'Production', 'gender': 1, 'id': 1034748, 'job': 'Casting', 'name': 'Tricia Wood', 'profile_path': None}, {'credit_id': '52fe4b2c9251416c910d254d', 'department': 'Production', 'gender': 0, 'id': 1133362, 'job': 'Producer', 'name': 'David Grumbach', 'profile_path': None}, {'credit_id': '52fe4b2c9251416c910d2553', 'department': 'Production', 'gender': 0, 'id': 1133363, 'job': 'Producer', 'name': 'Eitan Mansuri', 'profile_path': None}, {'credit_id': '55c0d6db9251410d7b001ad8', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 1322407, 'job': 'Costume Design', 'name': 'Mandi Line', 'profile_path': None}, {'credit_id': '55c0d6849251410d7b001acf', 'department': 'Production', 'gender': 0, 'id': 1493161, 'job': 'Casting', 'name': 'Erin Toner', 'profile_path': None}, {'credit_id': '55c0d70fc3a36823f7001f97', 'department': 'Visual Effects', 'gender': 0, 'id': 1493546, 'job': 'Animation Director', 'name': 'Yoni Goodman', 'profile_path': None}, {'credit_id': '55c0d733c3a3682379001e7b', 'department': 'Visual Effects', 'gender': 0, 'id': 1493547, 'job': 'Animation Supervisor', 'name': 'Sefi Gayego', 'profile_path': None}, {'credit_id': '5718b0809251417a220094c0', 'department': 'Directing', 'gender': 0, 'id': 1608656, 'job': 'Script Supervisor', 'name': 'Barbara Krieger', 'profile_path': None}] 152795 +22150 [{'cast_id': 1, 'character': 'Lin', 'credit_id': '52fe477c9251416c7509b273', 'gender': 1, 'id': 133386, 'name': 'Sabrina Gennarino', 'order': 0, 'profile_path': '/j2BOZDTXKIq49txsx92vCF5sCxg.jpg'}, {'cast_id': 2, 'character': 'Chad', 'credit_id': '52fe477c9251416c7509b277', 'gender': 2, 'id': 133387, 'name': 'Tom Eplin', 'order': 1, 'profile_path': None}] [{'credit_id': '52fe477c9251416c7509b27d', 'department': 'Directing', 'gender': 2, 'id': 105976, 'job': 'Director', 'name': 'Jake Kennedy', 'profile_path': None}, {'credit_id': '540aa7fa0e0a262b43001baa', 'department': 'Writing', 'gender': 2, 'id': 105976, 'job': 'Writer', 'name': 'Jake Kennedy', 'profile_path': None}] 18440 +23043 [{'cast_id': 1001, 'character': 'Tykke', 'credit_id': '52fe44cec3a368484e037f01', 'gender': 2, 'id': 15086, 'name': 'Nicolas Bro', 'order': 0, 'profile_path': '/bPfMDMjbFZzelnZQfLzVTk6qlwi.jpg'}, {'cast_id': 1002, 'character': 'Jimmy', 'credit_id': '52fe44cec3a368484e037f05', 'gender': 2, 'id': 93236, 'name': 'David Dencik', 'order': 1, 'profile_path': '/h6ndLvEolmMOQs2hhbSEZMomSUu.jpg'}, {'cast_id': 1003, 'character': 'Ebbe', 'credit_id': '52fe44cec3a368484e037f09', 'gender': 0, 'id': 234401, 'name': 'Claus Flygare', 'order': 2, 'profile_path': None}, {'cast_id': 1004, 'character': 'Laust', 'credit_id': '52fe44cec3a368484e037f0d', 'gender': 0, 'id': 234402, 'name': 'Michael Grønnemose', 'order': 3, 'profile_path': None}, {'cast_id': 1005, 'character': 'Mor', 'credit_id': '52fe44cec3a368484e037f11', 'gender': 1, 'id': 234403, 'name': 'Hanne Hedelund', 'order': 4, 'profile_path': '/wcFauJCRofUb2eaeMF5pX9oIunS.jpg'}, {'cast_id': 1008, 'character': 'Lars', 'credit_id': '52fe44cec3a368484e037f21', 'gender': 2, 'id': 20258, 'name': 'Thure Lindhardt', 'order': 5, 'profile_path': '/q1idmyiaxds2ElxwXOQac75wMAh.jpg'}, {'cast_id': 1009, 'character': 'Kim', 'credit_id': '59dc08d6c3a368620d05b4f5', 'gender': 2, 'id': 1460371, 'name': 'Mads Rømer Brolin-Tani', 'order': 6, 'profile_path': '/rVjW71rp5SRBMDt4Sck3Uqriydh.jpg'}] [{'credit_id': '52fe44cec3a368484e037efd', 'department': 'Directing', 'gender': 0, 'id': 234400, 'job': 'Director', 'name': 'Nicolo Donato', 'profile_path': None}, {'credit_id': '52fe44cec3a368484e037f17', 'department': 'Writing', 'gender': 0, 'id': 234400, 'job': 'Writer', 'name': 'Nicolo Donato', 'profile_path': None}, {'credit_id': '52fe44cec3a368484e037f1d', 'department': 'Writing', 'gender': 0, 'id': 1055743, 'job': 'Writer', 'name': 'Rasmus Birch', 'profile_path': None}] 25541 +24843 [{'cast_id': 13, 'character': 'Tommy Vinson', 'credit_id': '52fe43fb9251416c75025017', 'gender': 2, 'id': 16475, 'name': 'Burt Reynolds', 'order': 0, 'profile_path': '/g5AyPAhdr5OtHVRGLD3i8Gw4oas.jpg'}, {'cast_id': 14, 'character': 'Alex Stillman', 'credit_id': '52fe43fb9251416c7502501b', 'gender': 2, 'id': 68178, 'name': 'Bret Harrison', 'order': 1, 'profile_path': '/v5CP0ErEKJZAbkb49ZrR5HaD6IJ.jpg'}, {'cast_id': 15, 'character': 'Michelle', 'credit_id': '52fe43fb9251416c7502501f', 'gender': 1, 'id': 21596, 'name': 'Shannon Elizabeth', 'order': 2, 'profile_path': '/c7l0KsU2PAXXseofmSvsmIMQH1N.jpg'}, {'cast_id': 16, 'character': 'Helen Vinson', 'credit_id': '52fe43fb9251416c75025023', 'gender': 0, 'id': 68179, 'name': 'Maria Mason', 'order': 3, 'profile_path': None}, {'cast_id': 17, 'character': 'Karen "Razor" Jones', 'credit_id': '52fe43fb9251416c75025027', 'gender': 1, 'id': 7906, 'name': 'Jennifer Tilly', 'order': 4, 'profile_path': '/bNB0BTnPAdAAHuCQSprMk4smBMD.jpg'}, {'cast_id': 18, 'character': 'Mr. Stillman', 'credit_id': '52fe43fb9251416c7502502b', 'gender': 2, 'id': 68180, 'name': 'Gary Grubbs', 'order': 5, 'profile_path': '/zbSSBFW8oRl4M1u2vyr1PUnWqCw.jpg'}, {'cast_id': 19, 'character': 'Charlie Adler', 'credit_id': '52fe43fb9251416c7502502f', 'gender': 2, 'id': 1466, 'name': 'Charles Durning', 'order': 6, 'profile_path': '/cDbNCqLStv13MW8W6DZ504xsbrz.jpg'}, {'cast_id': 21, 'character': 'Mrs. Stillman', 'credit_id': '5959332c9251410a59026f94', 'gender': 0, 'id': 938770, 'name': 'Caroline Mckinley', 'order': 7, 'profile_path': None}, {'cast_id': 22, 'character': 'Ben Thomas', 'credit_id': '5959336d925141222b00ba59', 'gender': 2, 'id': 1470874, 'name': 'Brandon Ray Olive', 'order': 8, 'profile_path': '/wS0Br9LiEMH1k40kOM2aVZ4oRHu.jpg'}, {'cast_id': 23, 'character': "Mike 'Double Diamond' Jackson", 'credit_id': '595933c39251410a59027024', 'gender': 0, 'id': 231857, 'name': 'Jon Eyez', 'order': 9, 'profile_path': '/1v11aZ5TmwtORs10OcU4wzhX2NW.jpg'}, {'cast_id': 24, 'character': 'Tex Button', 'credit_id': '595933e6c3a368265d026c23', 'gender': 2, 'id': 129868, 'name': 'J.D. Evermore', 'order': 10, 'profile_path': '/iNn9JV3Kk7VJ9ICYKdjgwFq8EpF.jpg'}] [{'credit_id': '52fe43fb9251416c75024ff5', 'department': 'Editing', 'gender': 2, 'id': 1592, 'job': 'Editor', 'name': 'Eric Strand', 'profile_path': None}, {'credit_id': '52fe43fb9251416c75024fe3', 'department': 'Sound', 'gender': 0, 'id': 63454, 'job': 'Music', 'name': 'Peter Rafelson', 'profile_path': None}, {'credit_id': '52fe43fb9251416c75025035', 'department': 'Writing', 'gender': 2, 'id': 68167, 'job': 'Screenplay', 'name': 'Gil Cates Jr.', 'profile_path': None}, {'credit_id': '52fe43fb9251416c75024fd7', 'department': 'Directing', 'gender': 2, 'id': 68167, 'job': 'Director', 'name': 'Gil Cates Jr.', 'profile_path': None}, {'credit_id': '52fe43fb9251416c75024fdd', 'department': 'Writing', 'gender': 0, 'id': 68169, 'job': 'Screenplay', 'name': 'Marc Weinstock', 'profile_path': None}, {'credit_id': '52fe43fb9251416c75024fe9', 'department': 'Camera', 'gender': 2, 'id': 68170, 'job': 'Director of Photography', 'name': 'Tom Harting', 'profile_path': None}, {'credit_id': '52fe43fb9251416c75024fef', 'department': 'Editing', 'gender': 0, 'id': 68171, 'job': 'Editor', 'name': 'Jonathan Cates', 'profile_path': None}, {'credit_id': '52fe43fb9251416c75024ffb', 'department': 'Art', 'gender': 0, 'id': 68172, 'job': 'Production Design', 'name': 'Frank Zito', 'profile_path': None}, {'credit_id': '52fe43fb9251416c75025001', 'department': 'Production', 'gender': 0, 'id': 68173, 'job': 'Executive Producer', 'name': 'Michael Amato', 'profile_path': None}, {'credit_id': '52fe43fb9251416c75025007', 'department': 'Production', 'gender': 2, 'id': 68174, 'job': 'Executive Producer', 'name': 'Scott Lazar', 'profile_path': None}, {'credit_id': '52fe43fb9251416c7502500d', 'department': 'Production', 'gender': 0, 'id': 68175, 'job': 'Producer', 'name': 'Albert J. Salzer', 'profile_path': None}, {'credit_id': '52fe43fb9251416c75025013', 'department': 'Production', 'gender': 0, 'id': 68177, 'job': 'Producer', 'name': 'Marc Weinstock', 'profile_path': None}] 11115 +25949 [{'cast_id': 3, 'character': 'Sam Smith', 'credit_id': '52fe4b9bc3a36847f820bbab', 'gender': 2, 'id': 104561, 'name': 'Craig Roberts', 'order': 0, 'profile_path': '/dO58JjBZUM82HqGTv7AMN0O2tJ6.jpg'}, {'cast_id': 4, 'character': 'Mary Bright', 'credit_id': '52fe4b9bc3a36847f820bbaf', 'gender': 1, 'id': 17606, 'name': 'Imogen Poots', 'order': 1, 'profile_path': '/jhirGwxoletj6wAzAttWJj3ra3H.jpg'}, {'cast_id': 5, 'character': 'Cameron', 'credit_id': '52fe4b9bc3a36847f820bbb3', 'gender': 2, 'id': 9013, 'name': 'Kevin McKidd', 'order': 2, 'profile_path': '/5MahWq6XoSuThdSLvVXUDa2LlkP.jpg'}, {'cast_id': 6, 'character': 'Charlie', 'credit_id': '52fe4b9bc3a36847f820bbb7', 'gender': 2, 'id': 9191, 'name': 'Timothy Spall', 'order': 3, 'profile_path': '/yMClxMrLdOXk37RGvrqJEWjg9ko.jpg'}] [{'credit_id': '52fe4b9bc3a36847f820bba1', 'department': 'Directing', 'gender': 0, 'id': 1064952, 'job': 'Director', 'name': 'Simon Aboud', 'profile_path': None}, {'credit_id': '52fe4b9bc3a36847f820bba7', 'department': 'Writing', 'gender': 0, 'id': 1064952, 'job': 'Writer', 'name': 'Simon Aboud', 'profile_path': None}] 116723 +25950 [{'cast_id': 12, 'character': 'The Creature', 'credit_id': '52fe4381c3a36847f8059117', 'gender': 2, 'id': 10843, 'name': 'Luke Goss', 'order': 0, 'profile_path': '/hlY2Jkn6wOJoo3F5h8AYaX5XHh9.jpg'}, {'cast_id': 13, 'character': 'Victor Frankenstein', 'credit_id': '52fe4381c3a36847f805911b', 'gender': 2, 'id': 13362, 'name': 'Alec Newman', 'order': 1, 'profile_path': '/qbSY0vUIyc8SUuLCw7hK5CsTpvQ.jpg'}, {'cast_id': 14, 'character': 'Caroline Frankenstein', 'credit_id': '52fe4381c3a36847f805911f', 'gender': 1, 'id': 1146, 'name': 'Julie Delpy', 'order': 2, 'profile_path': '/4LfbFCLGHHkHVikRdgxbN6hbatl.jpg'}, {'cast_id': 18, 'character': 'Prof. Waldmann', 'credit_id': '52fe4381c3a36847f805912f', 'gender': 2, 'id': 227, 'name': 'William Hurt', 'order': 3, 'profile_path': '/mf5GiYZjURQ72CPtY1kBva7mqIK.jpg'}, {'cast_id': 17, 'character': 'Capt. Walton', 'credit_id': '52fe4381c3a36847f805912b', 'gender': 2, 'id': 55636, 'name': 'Donald Sutherland', 'order': 4, 'profile_path': '/tPLVaPjxEscGPKS3ieByloa8Mqj.jpg'}, {'cast_id': 15, 'character': 'Elizabeth Frankenstein', 'credit_id': '52fe4381c3a36847f8059123', 'gender': 1, 'id': 29944, 'name': 'Nicole Lewis', 'order': 5, 'profile_path': None}, {'cast_id': 16, 'character': 'Justine', 'credit_id': '52fe4381c3a36847f8059127', 'gender': 0, 'id': 29945, 'name': 'Monika Hilmerová', 'order': 6, 'profile_path': '/wmUfxZ154G7stVFUaIWBXvXd6E1.jpg'}, {'cast_id': 19, 'character': 'Leutnant', 'credit_id': '52fe4381c3a36847f8059133', 'gender': 0, 'id': 29946, 'name': 'Tomas Mastalir', 'order': 7, 'profile_path': None}, {'cast_id': 46, 'character': 'Henry', 'credit_id': '58197471925141339c003e6a', 'gender': 2, 'id': 221018, 'name': 'Dan Stevens', 'order': 8, 'profile_path': '/jNiY649MK85UFMosJIDxJ9HgIsC.jpg'}] [{'credit_id': '52fe4380c3a36847f80590dd', 'department': 'Writing', 'gender': 1, 'id': 28970, 'job': 'Novel', 'name': 'Mary Shelley', 'profile_path': '/rcTyPNOKNFmZi61FVdAivtBroXt.jpg'}, {'credit_id': '52fe4380c3a36847f80590e3', 'department': 'Writing', 'gender': 2, 'id': 29936, 'job': 'Screenplay', 'name': 'Mark Kruger', 'profile_path': None}, {'credit_id': '52fe4381c3a36847f80590e9', 'department': 'Production', 'gender': 0, 'id': 29937, 'job': 'Producer', 'name': 'James Wilberger', 'profile_path': None}, {'credit_id': '52fe4381c3a36847f80590ef', 'department': 'Sound', 'gender': 2, 'id': 29938, 'job': 'Music', 'name': 'Roger Bellon', 'profile_path': None}, {'credit_id': '52fe4381c3a36847f80590f5', 'department': 'Camera', 'gender': 2, 'id': 22057, 'job': 'Director of Photography', 'name': 'Alan Caso', 'profile_path': None}, {'credit_id': '52fe4381c3a36847f80590fb', 'department': 'Editing', 'gender': 1, 'id': 29939, 'job': 'Editor', 'name': 'Jennifer Jean Cacavas', 'profile_path': None}, {'credit_id': '52fe4381c3a36847f8059101', 'department': 'Production', 'gender': 1, 'id': 29940, 'job': 'Casting', 'name': 'Gillian Hawser', 'profile_path': None}, {'credit_id': '52fe4381c3a36847f8059107', 'department': 'Production', 'gender': 2, 'id': 29941, 'job': 'Casting', 'name': 'Matthew Lessall', 'profile_path': None}, {'credit_id': '52fe4381c3a36847f805910d', 'department': 'Production', 'gender': 0, 'id': 29942, 'job': 'Casting', 'name': 'Lenka Stefankovicova', 'profile_path': None}, {'credit_id': '52fe4381c3a36847f8059139', 'department': 'Directing', 'gender': 2, 'id': 95329, 'job': 'Director', 'name': 'Kevin Connor', 'profile_path': '/ke8acwv2T9YfIIwPqh5zJg7aBh5.jpg'}, {'credit_id': '56533520c3a3686af6003b1f', 'department': 'Art', 'gender': 2, 'id': 29943, 'job': 'Production Design', 'name': 'Jonathan A. Carlson', 'profile_path': None}, {'credit_id': '565335339251417a7b003a14', 'department': 'Art', 'gender': 0, 'id': 1360847, 'job': 'Art Direction', 'name': 'Viera Dandová', 'profile_path': None}, {'credit_id': '565335449251417a6e00389b', 'department': 'Art', 'gender': 0, 'id': 1539789, 'job': 'Set Decoration', 'name': 'Stano Mozny', 'profile_path': None}, {'credit_id': '565335599251417a7100391d', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 12655, 'job': 'Costume Design', 'name': 'Barbara Lane', 'profile_path': None}, {'credit_id': '5653357b9251417a71003924', 'department': 'Art', 'gender': 0, 'id': 1539790, 'job': 'Assistant Art Director', 'name': 'Jeanina Vasilescu', 'profile_path': None}, {'credit_id': '565335969251417a6e0038a7', 'department': 'Art', 'gender': 0, 'id': 1301368, 'job': 'Construction Coordinator', 'name': 'Tomas Berka', 'profile_path': None}, {'credit_id': '565335d0c3a3686aef003a60', 'department': 'Visual Effects', 'gender': 0, 'id': 1201318, 'job': 'Special Effects Supervisor', 'name': 'Tissi Brandhofer', 'profile_path': None}, {'credit_id': '565336099251417a6b003ab2', 'department': 'Visual Effects', 'gender': 0, 'id': 1398930, 'job': 'Visual Effects Coordinator', 'name': 'Vít Komrzý', 'profile_path': None}, {'credit_id': '5653361f9251417a69003989', 'department': 'Visual Effects', 'gender': 2, 'id': 1539796, 'job': 'Visual Effects Producer', 'name': 'Jan Vseticek', 'profile_path': None}, {'credit_id': '5653363f9251417a71003967', 'department': 'Visual Effects', 'gender': 0, 'id': 1417979, 'job': 'Visual Effects Supervisor', 'name': 'Viktor Muller', 'profile_path': None}, {'credit_id': '5653365d9251417a690039ab', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 1539798, 'job': 'Costume Supervisor', 'name': 'Darina Suranová', 'profile_path': None}, {'credit_id': '5653367dc3a3686af3003a77', 'department': 'Crew', 'gender': 0, 'id': 1539799, 'job': 'Armorer', 'name': 'Juraj Ocenas', 'profile_path': None}, {'credit_id': '56533693c3a3686aed003a75', 'department': 'Directing', 'gender': 0, 'id': 1155056, 'job': 'Script Supervisor', 'name': 'Laura Siváková', 'profile_path': None}, {'credit_id': '565336a6c3a3686afa00370d', 'department': 'Directing', 'gender': 0, 'id': 1539800, 'job': 'Script Supervisor', 'name': 'Jana Sukenikova', 'profile_path': None}, {'credit_id': '565336d6c3a3686b00003ebf', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 1539804, 'job': 'Key Hair Stylist', 'name': 'Stefania Zarecka', 'profile_path': None}, {'credit_id': '565336ee9251417a7d00395a', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 1539805, 'job': 'Makeup Artist', 'name': 'Beatrix Dollingerova', 'profile_path': None}, {'credit_id': '56533724c3a3686aed003a8f', 'department': 'Crew', 'gender': 0, 'id': 96389, 'job': 'Second Unit Cinematographer', 'name': 'Miro Gábor', 'profile_path': None}, {'credit_id': '56533742c3a3686af3003aac', 'department': 'Camera', 'gender': 0, 'id': 1539807, 'job': 'Camera Operator', 'name': 'Peter Hyks', 'profile_path': None}, {'credit_id': '5653375ac3a3686b03003bc3', 'department': 'Camera', 'gender': 0, 'id': 1539808, 'job': 'Camera Operator', 'name': 'Tomas Juricek', 'profile_path': None}, {'credit_id': '565337779251417a710039a7', 'department': 'Camera', 'gender': 0, 'id': 1266989, 'job': 'Still Photographer', 'name': 'Ken Woroner', 'profile_path': None}, {'credit_id': '5653379fc3a3686af6003baf', 'department': 'Editing', 'gender': 0, 'id': 1398180, 'job': 'Dialogue Editor', 'name': 'Michael Wetherwax', 'profile_path': None}, {'credit_id': '565337c8c3a3686af3003ac3', 'department': 'Sound', 'gender': 0, 'id': 1398178, 'job': 'Foley', 'name': 'Eric Hoeschen', 'profile_path': None}, {'credit_id': '565337e7c3a3686b03003be6', 'department': 'Sound', 'gender': 0, 'id': 1049322, 'job': 'Sound Effects Editor', 'name': 'Paul J. Diller', 'profile_path': None}, {'credit_id': '56533836c3a3686b00003f00', 'department': 'Sound', 'gender': 2, 'id': 1539809, 'job': 'Sound Effects Editor', 'name': 'David Lynch', 'profile_path': None}, {'credit_id': '5653389c9251415dc5001ddc', 'department': 'Sound', 'gender': 0, 'id': 1539810, 'job': 'Sound Effects Editor', 'name': 'Kenneth Young', 'profile_path': None}] 3057 +25953 [{'cast_id': 1, 'character': 'Orry Main', 'credit_id': '52fe4ac6c3a368484e1664bb', 'gender': 2, 'id': 723, 'name': 'Patrick Swayze', 'order': 0, 'profile_path': '/3mpjuYiGfglDeaGjwFmSBBzwbc1.jpg'}, {'cast_id': 2, 'character': 'Elkanah Bent', 'credit_id': '52fe4ac6c3a368484e1664bf', 'gender': 0, 'id': 198886, 'name': 'Philip Casnoff', 'order': 1, 'profile_path': '/oGTYVKlW7RbW6nMuX5ErMmXZjei.jpg'}, {'cast_id': 3, 'character': 'Virgilia Hazard', 'credit_id': '52fe4ac6c3a368484e1664c3', 'gender': 1, 'id': 1796, 'name': 'Kirstie Alley', 'order': 2, 'profile_path': '/fQ1EOV3dazf05uhrEVqMN2S3T0U.jpg'}, {'cast_id': 4, 'character': 'Brett Main Hazard', 'credit_id': '52fe4ac6c3a368484e1664c7', 'gender': 0, 'id': 82345, 'name': 'Genie Francis', 'order': 3, 'profile_path': '/sdV3104UbZ9tudnJcUhNCy3ui48.jpg'}] [{'credit_id': '54ae8ef1c3a368070f000380', 'department': 'Directing', 'gender': 2, 'id': 95329, 'job': 'Director', 'name': 'Kevin Connor', 'profile_path': '/ke8acwv2T9YfIIwPqh5zJg7aBh5.jpg'}, {'credit_id': '54ae8f07c3a368328e000300', 'department': 'Writing', 'gender': 2, 'id': 151853, 'job': 'Writer', 'name': 'Douglas Heyes', 'profile_path': None}, {'credit_id': '54ae8efb925141152c0017d3', 'department': 'Writing', 'gender': 0, 'id': 1220670, 'job': 'Writer', 'name': 'Richard Fielder', 'profile_path': None}, {'credit_id': '54ae8f0f925141152c0017db', 'department': 'Writing', 'gender': 0, 'id': 1374922, 'job': 'Writer', 'name': 'John Jakes', 'profile_path': None}] 125458 +25954 [{'cast_id': 7, 'character': 'Hollander', 'credit_id': '52fe4da29251416c9111ce71', 'gender': 2, 'id': 2232, 'name': 'Michael Keaton', 'order': 0, 'profile_path': '/myVdrYNGTgqunLfUSaM8DuVD7DL.jpg'}, {'cast_id': 3, 'character': 'Sara', 'credit_id': '52fe4da29251416c9111ce61', 'gender': 1, 'id': 11705, 'name': 'Michelle Monaghan', 'order': 1, 'profile_path': '/10PSqSu9Jlfh5rWLbN3YCr8N0P5.jpg'}, {'cast_id': 4, 'character': 'Ryan', 'credit_id': '52fe4da29251416c9111ce65', 'gender': 2, 'id': 134673, 'name': 'Andrew W. Walker', 'order': 2, 'profile_path': '/tIXOvMadH1JI13DFOTnpAfrvQiV.jpg'}, {'cast_id': 5, 'character': 'Blake', 'credit_id': '52fe4da29251416c9111ce69', 'gender': 1, 'id': 70787, 'name': 'Kaniehtiio Horn', 'order': 3, 'profile_path': '/osp3WEMHeI2aBl3WoB1zllJyDTZ.jpg'}, {'cast_id': 8, 'character': 'Chad', 'credit_id': '52fe4da29251416c9111ce75', 'gender': 0, 'id': 183538, 'name': 'Barry Sloane', 'order': 4, 'profile_path': '/kZIPKGCKE47d92mrLN5DbWpsENe.jpg'}, {'cast_id': 16, 'character': 'Danny', 'credit_id': '55e7e47f9251413e380013c1', 'gender': 2, 'id': 82646, 'name': 'Trevor Hayes', 'order': 5, 'profile_path': '/gDTWopS3erov8bhvNohWZKryt3X.jpg'}, {'cast_id': 9, 'character': 'Antonio', 'credit_id': '52fe4da29251416c9111ce79', 'gender': 2, 'id': 23874, 'name': 'Phillip Jarrett', 'order': 6, 'profile_path': None}, {'cast_id': 10, 'character': 'le garçon flemmard', 'credit_id': '5443ac30c3a3683e0e004b7b', 'gender': 0, 'id': 1267117, 'name': 'Olivier Surprenant', 'order': 7, 'profile_path': None}, {'cast_id': 11, 'character': "un sergent de l'armée américaine", 'credit_id': '5443acd9c3a3683e01004b75', 'gender': 0, 'id': 1375561, 'name': 'Timothy Paul Coderre', 'order': 8, 'profile_path': '/z4NEC4bUo2zOyX2IwWRyYIGGWoS.jpg'}, {'cast_id': 12, 'character': 'la fille flemmarde', 'credit_id': '5443af29c3a3683e0e004c07', 'gender': 0, 'id': 1375566, 'name': 'Jasmine Chan', 'order': 9, 'profile_path': None}, {'cast_id': 13, 'character': 'la fille au parc', 'credit_id': '5443b0710e0a26632d004c4e', 'gender': 0, 'id': 1375567, 'name': 'Namukasa Basudde', 'order': 10, 'profile_path': None}, {'cast_id': 14, 'character': 'un passant', 'credit_id': '5443b0de0e0a26634d004c7d', 'gender': 0, 'id': 1375568, 'name': 'Zhaida Uddin', 'order': 11, 'profile_path': None}, {'cast_id': 15, 'character': 'Femme de Kabul en burka', 'credit_id': '5476327dc3a368069c0001de', 'gender': 0, 'id': 1389748, 'name': 'Lindsey Reeves-Whit', 'order': 12, 'profile_path': None}] [{'credit_id': '52fe4da29251416c9111ce5d', 'department': 'Writing', 'gender': 2, 'id': 2075, 'job': 'Screenplay', 'name': 'David Loughery', 'profile_path': None}, {'credit_id': '596bd537c3a3684c50004f81', 'department': 'Production', 'gender': 2, 'id': 2075, 'job': 'Producer', 'name': 'David Loughery', 'profile_path': None}, {'credit_id': '596bd76e9251413b6e004ccd', 'department': 'Editing', 'gender': 2, 'id': 5327, 'job': 'Editor', 'name': 'Andrew Mondshein', 'profile_path': None}, {'credit_id': '596bd6529251413b1800478d', 'department': 'Sound', 'gender': 2, 'id': 9989, 'job': 'Original Music Composer', 'name': 'Mark Mancina', 'profile_path': '/lt71bHajcyP9Nk7n18Nuq6NidA8.jpg'}, {'credit_id': '596bd6ea9251413b24004fdc', 'department': 'Camera', 'gender': 2, 'id': 34004, 'job': 'Director of Photography', 'name': 'Chris Seager', 'profile_path': None}, {'credit_id': '596bd7b9c3a3684c730050fb', 'department': 'Production', 'gender': 1, 'id': 46943, 'job': 'Casting', 'name': 'Kathleen Chopin', 'profile_path': None}, {'credit_id': '52fe4da29251416c9111ce57', 'department': 'Directing', 'gender': 2, 'id': 52629, 'job': 'Director', 'name': 'Joseph Ruben', 'profile_path': None}, {'credit_id': '596bd5a49251413b60004a75', 'department': 'Production', 'gender': 2, 'id': 52629, 'job': 'Producer', 'name': 'Joseph Ruben', 'profile_path': None}, {'credit_id': '596bd568c3a3684c02004dba', 'department': 'Production', 'gender': 2, 'id': 105835, 'job': 'Producer', 'name': 'Robert Menzies', 'profile_path': None}, {'credit_id': '596bd4fac3a3684c61004d2c', 'department': 'Production', 'gender': 0, 'id': 152074, 'job': 'Producer', 'name': 'Michael Baker', 'profile_path': None}, {'credit_id': '596bd5f8c3a3684c1000454c', 'department': 'Production', 'gender': 0, 'id': 585882, 'job': 'Producer', 'name': 'Jeff Sackman', 'profile_path': None}, {'credit_id': '596bd7eac3a3684bcb0048bf', 'department': 'Production', 'gender': 1, 'id': 979724, 'job': 'Casting', 'name': 'Ilona Smyth', 'profile_path': None}, {'credit_id': '596bde24c3a3684bcb004e1d', 'department': 'Crew', 'gender': 0, 'id': 1316787, 'job': 'Stunt Coordinator', 'name': 'Layton Morrison', 'profile_path': None}, {'credit_id': '596be0149251413b18004fac', 'department': 'Sound', 'gender': 0, 'id': 1376276, 'job': 'Supervising Music Editor', 'name': 'Mike Flicker', 'profile_path': None}, {'credit_id': '596be04ac3a3684c390054c5', 'department': 'Sound', 'gender': 0, 'id': 1376277, 'job': 'Music Editor', 'name': 'Matt Friedman', 'profile_path': None}, {'credit_id': '596bdf82c3a3684bcb004f25', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 1413911, 'job': 'Costume Supervisor', 'name': 'Christina Cattle', 'profile_path': None}, {'credit_id': '596bd8179251413b57004cfe', 'department': 'Art', 'gender': 0, 'id': 1418415, 'job': 'Production Design', 'name': 'Lisa Soper', 'profile_path': None}, {'credit_id': '596bd871c3a3684c730051ae', 'department': 'Art', 'gender': 0, 'id': 1418416, 'job': 'Art Direction', 'name': 'Shane Boucher', 'profile_path': None}, {'credit_id': '596bd8dec3a3684bcb00499c', 'department': 'Art', 'gender': 0, 'id': 1418417, 'job': 'Set Decoration', 'name': 'Garren Dunbar', 'profile_path': None}, {'credit_id': '596bd97a9251413b60004e0d', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 1418418, 'job': 'Key Hair Stylist', 'name': 'Kayla Manasseri', 'profile_path': None}, {'credit_id': '596bd9b09251413b60004e44', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 1418419, 'job': 'Key Makeup Artist', 'name': 'Angie Mills', 'profile_path': None}, {'credit_id': '596bdae1c3a3684c0200531b', 'department': 'Art', 'gender': 0, 'id': 1418425, 'job': 'Construction Coordinator', 'name': 'William Mood', 'profile_path': None}, {'credit_id': '596bdb4a9251413b3d005268', 'department': 'Art', 'gender': 0, 'id': 1418427, 'job': 'Property Master', 'name': 'Tim Winchester', 'profile_path': None}, {'credit_id': '596be0cac3a3684c3900552e', 'department': 'Directing', 'gender': 1, 'id': 1435539, 'job': 'Script Supervisor', 'name': 'Carolyn Arbuckle', 'profile_path': None}, {'credit_id': '596bd93dc3a3684c02005194', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 1435907, 'job': 'Costume Design', 'name': 'Sue Fijalkowska', 'profile_path': None}, {'credit_id': '596bdbe59251413b57005081', 'department': 'Sound', 'gender': 2, 'id': 1519335, 'job': 'Sound Effects Editor', 'name': 'David G. Burns', 'profile_path': None}, {'credit_id': '596be099c3a3684c39005503', 'department': 'Sound', 'gender': 1, 'id': 1533532, 'job': 'Music Supervisor', 'name': 'Laura Katz', 'profile_path': None}, {'credit_id': '596bde60c3a3684c0200562c', 'department': 'Camera', 'gender': 0, 'id': 1553776, 'job': 'Still Photographer', 'name': 'Albert Camicioli', 'profile_path': None}, {'credit_id': '596bdf1bc3a3684c390053c3', 'department': 'Camera', 'gender': 0, 'id': 1582679, 'job': 'Steadicam Operator', 'name': 'James Sainthill', 'profile_path': None}, {'credit_id': '596be1029251413b200053e0', 'department': 'Production', 'gender': 0, 'id': 1659970, 'job': 'Production Coordinator', 'name': 'Sarah Deline', 'profile_path': None}, {'credit_id': '596bddcd9251413b6e00530c', 'department': 'Crew', 'gender': 0, 'id': 1703337, 'job': 'Visual Effects Editor', 'name': 'Michael J. Wechsler', 'profile_path': None}, {'credit_id': '596bda30c3a3684bcb004aa3', 'department': 'Directing', 'gender': 0, 'id': 1852795, 'job': 'First Assistant Director', 'name': 'Reid A. Dunlop', 'profile_path': None}, {'credit_id': '596bdaa29251413b2400538e', 'department': 'Art', 'gender': 0, 'id': 1852796, 'job': 'Construction Coordinator', 'name': 'Maciej Fijalkowski', 'profile_path': None}, {'credit_id': '596bdc83c3a3684c0200548b', 'department': 'Editing', 'gender': 0, 'id': 1852798, 'job': 'Dialogue Editor', 'name': 'Matt Gorzkowski', 'profile_path': None}, {'credit_id': '596bdcd9c3a3684bcb004d02', 'department': 'Sound', 'gender': 0, 'id': 1852799, 'job': 'Sound Effects Editor', 'name': 'Michael Hanlan', 'profile_path': None}, {'credit_id': '596bdfcac3a3684c02005741', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 1852802, 'job': 'Wardrobe Supervisor', 'name': 'Jasmine Murray-Bergquist', 'profile_path': None}] 199591 +25955 [{'cast_id': 1, 'character': 'Johan Falk', 'credit_id': '52fe447dc3a368484e026045', 'gender': 0, 'id': 92403, 'name': 'Jakob Eklund', 'order': 0, 'profile_path': '/r5DTiqVW8rkaQ3biVu75sfVeCBU.jpg'}, {'cast_id': 3, 'character': 'Frank Wagner', 'credit_id': '52fe447dc3a368484e026049', 'gender': 2, 'id': 92404, 'name': 'Joel Kinnaman', 'order': 1, 'profile_path': '/aylKCZLLFuuT9OB5RyaTtNBr0kn.jpg'}, {'cast_id': 4, 'character': 'Patrik Angrell', 'credit_id': '52fe447dc3a368484e02604d', 'gender': 0, 'id': 92408, 'name': 'Mikael Tornving', 'order': 2, 'profile_path': '/vgr3XZBx3uVmBKMLgdcctdIJb6M.jpg'}, {'cast_id': 6, 'character': 'Sophie Nordh', 'credit_id': '52fe447dc3a368484e026057', 'gender': 0, 'id': 1199647, 'name': 'Meliz Karlge', 'order': 3, 'profile_path': '/ydyNcB1SdAaJfVNbYhT34BRrjgf.jpg'}, {'cast_id': 7, 'character': 'Tommy Ridders', 'credit_id': '54c8e85d92514108c7001626', 'gender': 2, 'id': 544030, 'name': 'Reuben Sallmander', 'order': 4, 'profile_path': None}, {'cast_id': 9, 'character': 'Seth Rydell', 'credit_id': '55b4fcc89251417364006736', 'gender': 0, 'id': 92429, 'name': 'Jens Hultén', 'order': 5, 'profile_path': '/3dgP1WpqlKoKhC8GntrKAB3ozFB.jpg'}, {'cast_id': 10, 'character': 'Anja Månsdottir', 'credit_id': '55b4fd11c3a3682ff90131f7', 'gender': 0, 'id': 90604, 'name': 'Jacqueline Ramel', 'order': 6, 'profile_path': '/ju4a4S8rSouPWR8Vf9TdnfzC4YX.jpg'}, {'cast_id': 13, 'character': 'Dick Jörgensen', 'credit_id': '55b88b30c3a3684af5001213', 'gender': 0, 'id': 6327, 'name': 'André Sjöberg', 'order': 7, 'profile_path': '/mWtHuiUTPJsTX9hTR8dfDkBAobl.jpg'}, {'cast_id': 12, 'character': 'Lasse Karlsson', 'credit_id': '55b88ab092514123dd0014f5', 'gender': 0, 'id': 64975, 'name': 'Henrik Norlén', 'order': 8, 'profile_path': '/wDmJsMvxIbvMJf0ocIjl3uPvp81.jpg'}, {'cast_id': 8, 'character': 'Matte', 'credit_id': '55b4fca292514125da0148ef', 'gender': 2, 'id': 1478585, 'name': 'Zeljko Santrac', 'order': 9, 'profile_path': '/AetgbwQeYM95c9MPTX9yeDJS6Ew.jpg'}, {'cast_id': 11, 'character': 'Helén Andersson', 'credit_id': '55b88a8ac3a36869b40006a0', 'gender': 1, 'id': 5024, 'name': 'Marie Richardson', 'order': 10, 'profile_path': '/2DhxYsLmTybbAewVQBNPflKgYYb.jpg'}, {'cast_id': 14, 'character': 'Marie', 'credit_id': '55b892a8c3a3684af50012df', 'gender': 1, 'id': 106660, 'name': 'Ruth Vega Fernandez', 'order': 11, 'profile_path': '/eMw0Z68r4zVX0rYvjTDJYzTh2K2.jpg'}, {'cast_id': 15, 'character': 'Martin Borhulth', 'credit_id': '57cbde6592514141820021f7', 'gender': 2, 'id': 213486, 'name': 'Martin Wallström', 'order': 12, 'profile_path': '/hL6hCJCX6QJe1q6zgXCHKMOOTnk.jpg'}] [{'credit_id': '52fe447dc3a368484e026053', 'department': 'Directing', 'gender': 0, 'id': 92410, 'job': 'Director', 'name': 'Anders Nilsson', 'profile_path': '/ngwqZEuE0zwEfHHD5YXWd32AcGd.jpg'}] 24023 +25956 [{'cast_id': 1, 'character': 'Johan Falk', 'credit_id': '52fe447dc3a368484e0260eb', 'gender': 0, 'id': 92403, 'name': 'Jakob Eklund', 'order': 0, 'profile_path': '/r5DTiqVW8rkaQ3biVu75sfVeCBU.jpg'}, {'cast_id': 2, 'character': 'Frank Wagner', 'credit_id': '52fe447dc3a368484e0260ef', 'gender': 2, 'id': 92404, 'name': 'Joel Kinnaman', 'order': 1, 'profile_path': '/aylKCZLLFuuT9OB5RyaTtNBr0kn.jpg'}, {'cast_id': 4, 'character': 'Patrik Agrell', 'credit_id': '52fe447dc3a368484e0260f3', 'gender': 0, 'id': 92408, 'name': 'Mikael Tornving', 'order': 3, 'profile_path': '/vgr3XZBx3uVmBKMLgdcctdIJb6M.jpg'}, {'cast_id': 5, 'character': 'Anja Månsdottir', 'credit_id': '52fe447dc3a368484e0260f7', 'gender': 0, 'id': 90604, 'name': 'Jacqueline Ramel', 'order': 4, 'profile_path': '/ju4a4S8rSouPWR8Vf9TdnfzC4YX.jpg'}, {'cast_id': 9, 'character': 'Seth Rydell', 'credit_id': '52fe447dc3a368484e026107', 'gender': 0, 'id': 92429, 'name': 'Jens Hultén', 'order': 6, 'profile_path': '/3dgP1WpqlKoKhC8GntrKAB3ozFB.jpg'}, {'cast_id': 10, 'character': 'Felix Rydell', 'credit_id': '52fe447dc3a368484e02610b', 'gender': 2, 'id': 90605, 'name': 'Anastasios Soulis', 'order': 7, 'profile_path': '/aIAPSRQdE14yTedCxt57Vr6IJ4k.jpg'}, {'cast_id': 11, 'character': 'Sophie Nordh', 'credit_id': '52fe447dc3a368484e02610f', 'gender': 0, 'id': 1199647, 'name': 'Meliz Karlge', 'order': 8, 'profile_path': '/ydyNcB1SdAaJfVNbYhT34BRrjgf.jpg'}, {'cast_id': 12, 'character': 'Martin Borhulth', 'credit_id': '57cbde3ec3a3680a58001fe9', 'gender': 2, 'id': 213486, 'name': 'Martin Wallström', 'order': 9, 'profile_path': '/hL6hCJCX6QJe1q6zgXCHKMOOTnk.jpg'}] [{'credit_id': '52fe447dc3a368484e0260fd', 'department': 'Directing', 'gender': 0, 'id': 92410, 'job': 'Director', 'name': 'Anders Nilsson', 'profile_path': '/ngwqZEuE0zwEfHHD5YXWd32AcGd.jpg'}, {'credit_id': '52fe447dc3a368484e026103', 'department': 'Writing', 'gender': 0, 'id': 92405, 'job': 'Writer', 'name': 'Fredrik T. Olsson', 'profile_path': None}] 24026 +25957 [{'cast_id': 6, 'character': 'Mila', 'credit_id': '52fe44809251416c7503704d', 'gender': 1, 'id': 70839, 'name': 'Emilia Schüle', 'order': 0, 'profile_path': '/enO42U3OucP6J6yCR0dKcHHwM6h.jpg'}, {'cast_id': 17, 'character': 'Hanna', 'credit_id': '5929a7ca9251413b4104e372', 'gender': 1, 'id': 147945, 'name': 'Selina Shirin Müller', 'order': 1, 'profile_path': None}, {'cast_id': 18, 'character': 'Kati', 'credit_id': '5929a7da9251413b5404c15d', 'gender': 1, 'id': 147944, 'name': 'Henriette Nagel', 'order': 2, 'profile_path': None}, {'cast_id': 4, 'character': 'Markus', 'credit_id': '52fe44809251416c75037045', 'gender': 2, 'id': 70838, 'name': 'Jonathan Beck', 'order': 3, 'profile_path': '/aHgwJz04hft3NQtrnK9EXPYePHv.jpg'}, {'cast_id': 5, 'character': 'Gerard', 'credit_id': '52fe44809251416c75037049', 'gender': 2, 'id': 44890, 'name': 'David Berton', 'order': 4, 'profile_path': '/16DnlrXYs9KTZs1O05zEIR2c2Bb.jpg'}, {'cast_id': 7, 'character': "Mathelehrer 'Rumpelstilzchen'", 'credit_id': '52fe44809251416c75037051', 'gender': 2, 'id': 1083, 'name': 'Armin Rohde', 'order': 5, 'profile_path': '/jUOdSGDo9az8rtpZnP4AzzgydQj.jpg'}, {'cast_id': 8, 'character': 'Milas Mutter', 'credit_id': '52fe44809251416c75037055', 'gender': 1, 'id': 5645, 'name': 'Anke Engelke', 'order': 6, 'profile_path': '/qTmx6f0Hv8DtT8qDFB9yAHHYFMS.jpg'}, {'cast_id': 9, 'character': 'Pit Winter', 'credit_id': '52fe44809251416c75037059', 'gender': 2, 'id': 32425, 'name': 'David Rott', 'order': 7, 'profile_path': '/uEGwt2oWa0gpSAL3GtQmywscOdy.jpg'}, {'cast_id': 10, 'character': 'Brian', 'credit_id': '52fe44809251416c7503705d', 'gender': 2, 'id': 55183, 'name': 'Wilson Gonzalez Ochsenknecht', 'order': 8, 'profile_path': '/4PNS3CjPaEMHOv3IsqcGdiqhCqr.jpg'}, {'cast_id': 11, 'character': 'Frau Kempinski', 'credit_id': '52fe44809251416c75037061', 'gender': 0, 'id': 48683, 'name': 'Anna Böttcher', 'order': 9, 'profile_path': '/lhheQFGas1BMZs3wvxDwiUdeSHT.jpg'}, {'cast_id': 12, 'character': 'Mc Donald', 'credit_id': '52fe44809251416c75037065', 'gender': 0, 'id': 51827, 'name': 'Piet Klocke', 'order': 10, 'profile_path': None}, {'cast_id': 19, 'character': 'Vater von Markus', 'credit_id': '5929a80bc3a36877fd044f9a', 'gender': 2, 'id': 37035, 'name': 'Christian Tramitz', 'order': 11, 'profile_path': '/4exdszmCciDOd1C4F2VRwVShRvk.jpg'}, {'cast_id': 20, 'character': 'Sportleherer', 'credit_id': '5929a815c3a3687870049214', 'gender': 2, 'id': 19927, 'name': 'Michael Kessler', 'order': 12, 'profile_path': '/9367CR0Bkl7avU8pIxR8senGPSQ.jpg'}, {'cast_id': 21, 'character': 'Branko', 'credit_id': '5929a825c3a36877ee04f69e', 'gender': 2, 'id': 147947, 'name': 'Ben Unterkofler', 'order': 13, 'profile_path': None}, {'cast_id': 22, 'character': 'Knolle', 'credit_id': '5929a830c3a368783e04ecd8', 'gender': 0, 'id': 147948, 'name': 'Vincent Bruder', 'order': 14, 'profile_path': None}, {'cast_id': 23, 'character': 'Vanessa', 'credit_id': '5929a846c3a36877fd044fce', 'gender': 1, 'id': 147946, 'name': 'Christina Peifer', 'order': 15, 'profile_path': None}, {'cast_id': 24, 'character': 'Kiwi', 'credit_id': '5929a84fc3a36877bc054796', 'gender': 0, 'id': 147951, 'name': 'Marius Weingarten', 'order': 16, 'profile_path': None}, {'cast_id': 25, 'character': 'Martin', 'credit_id': '5929a864c3a36877df0518b4', 'gender': 0, 'id': 1258918, 'name': 'Sebastian Bender', 'order': 17, 'profile_path': None}, {'cast_id': 26, 'character': 'Hotelrezeptionistin', 'credit_id': '5929a876c3a36877bc0547d0', 'gender': 0, 'id': 18938, 'name': 'Anna Böger', 'order': 18, 'profile_path': None}, {'cast_id': 27, 'character': 'UPS-Fahrer', 'credit_id': '5929a886c3a36877bc0547e4', 'gender': 0, 'id': 1824599, 'name': 'George le Bonsai', 'order': 19, 'profile_path': None}, {'cast_id': 28, 'character': 'Florian', 'credit_id': '5929a891c3a36877fd045013', 'gender': 0, 'id': 1824600, 'name': 'Axel Kellermann', 'order': 20, 'profile_path': None}, {'cast_id': 29, 'character': 'Birefträger', 'credit_id': '5929a89d9251413ba00498c8', 'gender': 0, 'id': 1824601, 'name': 'Thomas Herget', 'order': 21, 'profile_path': None}, {'cast_id': 30, 'character': 'Betrunkener', 'credit_id': '5929a8a89251413b5004d257', 'gender': 0, 'id': 1824602, 'name': 'Peter Bosch', 'order': 22, 'profile_path': None}, {'cast_id': 31, 'character': 'Rocker', 'credit_id': '5929a8ba9251413b8a04d86d', 'gender': 0, 'id': 1824603, 'name': 'Lars Flessner', 'order': 23, 'profile_path': None}, {'cast_id': 32, 'character': 'Assistent', 'credit_id': '5929a8c69251413b5404c223', 'gender': 0, 'id': 1824604, 'name': 'Felix Kodron', 'order': 24, 'profile_path': None}, {'cast_id': 33, 'character': 'ältere Dame', 'credit_id': '5929a8d2c3a368787004929f', 'gender': 0, 'id': 1474905, 'name': 'Gerda Böken', 'order': 25, 'profile_path': None}, {'cast_id': 34, 'character': 'älterer Herr', 'credit_id': '5929a8e3c3a36878700492ad', 'gender': 0, 'id': 1824605, 'name': 'Jo Betzing', 'order': 26, 'profile_path': None}, {'cast_id': 35, 'character': 'Gitarrist in Brians Band', 'credit_id': '5929a8f8c3a368782d04a9d0', 'gender': 0, 'id': 1824606, 'name': 'Florian Korth', 'order': 27, 'profile_path': None}, {'cast_id': 36, 'character': 'Schlagzeuger in Brians Band', 'credit_id': '5929a9029251413b4104e470', 'gender': 0, 'id': 1824607, 'name': 'Felix Wagner', 'order': 28, 'profile_path': None}, {'cast_id': 37, 'character': 'Bassist in Brians Band', 'credit_id': '5929a997c3a368782d04aa3b', 'gender': 0, 'id': 1824608, 'name': 'Robin Schulz', 'order': 29, 'profile_path': None}, {'cast_id': 38, 'character': 'Rhythmusgitarrist in Brians Band', 'credit_id': '5929a9a79251413b4104e4ed', 'gender': 0, 'id': 1824610, 'name': 'Julian Sommer', 'order': 30, 'profile_path': None}, {'cast_id': 39, 'character': 'schöne Frau', 'credit_id': '5929a9b2c3a36877df0519bd', 'gender': 0, 'id': 1824611, 'name': 'Helene Walter', 'order': 31, 'profile_path': None}, {'cast_id': 40, 'character': 'Kollege des Mannes bei der Absperrung', 'credit_id': '5929aa069251413b8a04d98e', 'gender': 0, 'id': 1824612, 'name': 'Curtis Knight', 'order': 32, 'profile_path': None}, {'cast_id': 41, 'character': 'Bayer', 'credit_id': '5929aa0e9251413b5004d37e', 'gender': 0, 'id': 1824613, 'name': 'Peter Sauckel', 'order': 33, 'profile_path': None}, {'cast_id': 42, 'character': 'Rasta', 'credit_id': '5929aa179251413b5004d393', 'gender': 0, 'id': 1824614, 'name': 'Christian Dzielak', 'order': 34, 'profile_path': None}, {'cast_id': 43, 'character': 'Italiener', 'credit_id': '5929aa2ac3a368783e04eeab', 'gender': 0, 'id': 1824615, 'name': 'Sascha Bogdanovic', 'order': 35, 'profile_path': None}, {'cast_id': 44, 'character': 'Unternehmensberater', 'credit_id': '5929aa34c3a36877df051a3e', 'gender': 0, 'id': 1824616, 'name': 'Marc Wandt', 'order': 36, 'profile_path': None}] [{'credit_id': '52fe44809251416c75037035', 'department': 'Directing', 'gender': 1, 'id': 19912, 'job': 'Director', 'name': 'Ute Wieland', 'profile_path': None}, {'credit_id': '52fe44809251416c7503703b', 'department': 'Writing', 'gender': 0, 'id': 70837, 'job': 'Novel', 'name': 'Bianka-Minte König', 'profile_path': None}, {'credit_id': '52fe44809251416c75037041', 'department': 'Writing', 'gender': 0, 'id': 57364, 'job': 'Screenplay', 'name': 'Maggie Peren', 'profile_path': None}, {'credit_id': '52fe44809251416c7503706b', 'department': 'Camera', 'gender': 2, 'id': 19917, 'job': 'Director of Photography', 'name': 'Peter Przybylski', 'profile_path': None}, {'credit_id': '52fe44809251416c75037071', 'department': 'Production', 'gender': 0, 'id': 55988, 'job': 'Casting', 'name': 'Nicole Fischer', 'profile_path': None}, {'credit_id': '52fe44809251416c75037077', 'department': 'Production', 'gender': 0, 'id': 32959, 'job': 'Producer', 'name': 'Ulrich Limmer', 'profile_path': None}] 11752 +25965 [] [{'credit_id': '52fe4ae59251416c750f19b3', 'department': 'Directing', 'gender': 0, 'id': 1117498, 'job': 'Director', 'name': 'PES', 'profile_path': '/ptdvjxDeklgm0vv8hCYHIDiu50K.jpg'}, {'credit_id': '52fe4ae59251416c750f19b9', 'department': 'Writing', 'gender': 0, 'id': 1117498, 'job': 'Writer', 'name': 'PES', 'profile_path': '/ptdvjxDeklgm0vv8hCYHIDiu50K.jpg'}] 142563 +25966 [] [{'credit_id': '52fe4bbe9251416c910e4533', 'department': 'Directing', 'gender': 0, 'id': 1138629, 'job': 'Director', 'name': 'Minkyu Lee', 'profile_path': None}, {'credit_id': '55258026c3a3687dfe001f9a', 'department': 'Visual Effects', 'gender': 0, 'id': 1450331, 'job': 'Animation', 'name': 'James Baxter', 'profile_path': '/dRTLlsnpqDTqfqMJ0OHDUnPaJEY.jpg'}, {'credit_id': '554aeb8292514146a5000d6c', 'department': 'Visual Effects', 'gender': 0, 'id': 1464456, 'job': 'Color Designer', 'name': 'John Dusenberry', 'profile_path': None}, {'credit_id': '59044e5ac3a3684b0700889e', 'department': 'Visual Effects', 'gender': 0, 'id': 1717778, 'job': 'Animation', 'name': 'Jennifer Hager', 'profile_path': None}, {'credit_id': '59044e6dc3a3684a4e009634', 'department': 'Visual Effects', 'gender': 0, 'id': 1138629, 'job': 'Animation', 'name': 'Minkyu Lee', 'profile_path': None}, {'credit_id': '59044e93c3a3684af0008a0d', 'department': 'Visual Effects', 'gender': 0, 'id': 1806625, 'job': 'Animation', 'name': 'Austin Madison', 'profile_path': None}, {'credit_id': '59044ea7c3a3684af0008a20', 'department': 'Visual Effects', 'gender': 0, 'id': 1461156, 'job': 'Animation', 'name': 'Matt Williames', 'profile_path': None}, {'credit_id': '59044ecdc3a3684ad7009422', 'department': 'Visual Effects', 'gender': 0, 'id': 1461392, 'job': 'Animation', 'name': 'Mario Furmanczyk', 'profile_path': None}, {'credit_id': '59044f3cc3a3684ab400985c', 'department': 'Production', 'gender': 0, 'id': 1806628, 'job': 'Associate Producer', 'name': 'Heidi Jo Gilbert', 'profile_path': None}, {'credit_id': '59044f7cc3a3684a76008c86', 'department': 'Production', 'gender': 0, 'id': 139474, 'job': 'Consulting Producer', 'name': 'Glen Keane', 'profile_path': '/wFxCDcFi1PRl894WlTneXEKsvzl.jpg'}, {'credit_id': '59044f9fc3a3684ab40098b2', 'department': 'Production', 'gender': 0, 'id': 1806630, 'job': 'Consulting Producer', 'name': 'Thomas Ethan Harris', 'profile_path': None}, {'credit_id': '59044ff1c3a3684ab40098f4', 'department': 'Crew', 'gender': 0, 'id': 1806633, 'job': 'Technical Supervisor', 'name': 'Ethan Metzger', 'profile_path': None}, {'credit_id': '5904501f92514169d8009669', 'department': 'Sound', 'gender': 0, 'id': 1806634, 'job': 'Sound Designer', 'name': 'John Max Repka', 'profile_path': None}, {'credit_id': '5904503792514169c8009904', 'department': 'Crew', 'gender': 2, 'id': 961277, 'job': 'Score Engineer', 'name': 'Joey Newman', 'profile_path': None}] 157301 +25967 [{'cast_id': 1, 'character': 'Churchill', 'credit_id': '52fe4527c3a36847f80bf5ab', 'gender': 2, 'id': 18704, 'name': 'Joe Lando', 'order': 0, 'profile_path': '/6QiYKS5eNpLsVW71M0ZGaTMZJm7.jpg'}, {'cast_id': 2, 'character': 'Damian', 'credit_id': '52fe4527c3a36847f80bf5af', 'gender': 2, 'id': 58905, 'name': 'Dominic Zamprogna', 'order': 1, 'profile_path': '/8iC5CVTtE2hJteoZswP0Lldd6L3.jpg'}, {'cast_id': 3, 'character': 'Quintana', 'credit_id': '52fe4527c3a36847f80bf5b3', 'gender': 1, 'id': 21430, 'name': 'Natassia Malthe', 'order': 2, 'profile_path': '/9LuAHAshJ585iaXBxVpXVJIlC0D.jpg'}, {'cast_id': 5, 'character': 'Roman', 'credit_id': '52fe4527c3a36847f80bf5b7', 'gender': 2, 'id': 27128, 'name': 'Aaron Pearl', 'order': 4, 'profile_path': '/bgc9RfqNqK4pPVmt4FWJtlCS1fz.jpg'}, {'cast_id': 6, 'character': 'Fiona', 'credit_id': '52fe4527c3a36847f80bf5bb', 'gender': 1, 'id': 17236, 'name': 'A.J. Cook', 'order': 5, 'profile_path': '/tvEjGDQVuu7jiOvWXwEU6tEE7NW.jpg'}, {'cast_id': 7, 'character': 'Gilles', 'credit_id': '52fe4527c3a36847f80bf5bf', 'gender': 2, 'id': 55557, 'name': 'Michael DeLuise', 'order': 6, 'profile_path': '/ogvSHUb0jakiR7CyIph7TfQp9gV.jpg'}, {'cast_id': 8, 'character': 'Muco', 'credit_id': '52fe4527c3a36847f80bf5c3', 'gender': 2, 'id': 11086, 'name': 'Michael Ironside', 'order': 7, 'profile_path': '/i3HkCGEnQGJeD0U8WDumO4VH5fU.jpg'}, {'cast_id': 9, 'character': 'Phleg', 'credit_id': '52fe4527c3a36847f80bf5c7', 'gender': 2, 'id': 58906, 'name': 'David Palffy', 'order': 8, 'profile_path': None}, {'cast_id': 10, 'character': 'Officer Brackish', 'credit_id': '52fe4527c3a36847f80bf5cb', 'gender': 2, 'id': 58907, 'name': 'Elias Toufexis', 'order': 9, 'profile_path': '/7lUw0pCCKEG98Hve3H4EAFERG1l.jpg'}, {'cast_id': 11, 'character': "Damian's wife", 'credit_id': '52fe4527c3a36847f80bf5cf', 'gender': 0, 'id': 58908, 'name': 'Carrie Fleming', 'order': 10, 'profile_path': '/k4lZO5dXqnZ2Q9yTgHN9hc1r1wx.jpg'}, {'cast_id': 12, 'character': 'Woman', 'credit_id': '52fe4527c3a36847f80bf5d3', 'gender': 0, 'id': 58909, 'name': 'Charisse Baker', 'order': 11, 'profile_path': None}, {'cast_id': 13, 'character': 'Ble-Ka', 'credit_id': '52fe4527c3a36847f80bf5d7', 'gender': 2, 'id': 43299, 'name': 'John DeSantis', 'order': 12, 'profile_path': '/4jD0lw9Mo1iTJ5mbwkRpsWqPDBR.jpg'}, {'cast_id': 14, 'character': 'Worm Host Creature', 'credit_id': '52fe4527c3a36847f80bf5db', 'gender': 0, 'id': 58910, 'name': 'Geoff Redknap', 'order': 13, 'profile_path': None}, {'cast_id': 15, 'character': "Roman's Mom", 'credit_id': '52fe4527c3a36847f80bf5df', 'gender': 0, 'id': 58911, 'name': 'Krista Bell', 'order': 14, 'profile_path': '/jIF67I0hjhpUVHboc761n2XNwI2.jpg'}, {'cast_id': 22, 'character': 'Rosa', 'credit_id': '52fe4527c3a36847f80bf607', 'gender': 0, 'id': 25388, 'name': 'Leanne Adachi', 'order': 15, 'profile_path': None}] [{'credit_id': '52fe4527c3a36847f80bf5e5', 'department': 'Directing', 'gender': 2, 'id': 58912, 'job': 'Director', 'name': 'Matthew Hastings', 'profile_path': None}, {'credit_id': '52fe4527c3a36847f80bf5eb', 'department': 'Writing', 'gender': 2, 'id': 58912, 'job': 'Author', 'name': 'Matthew Hastings', 'profile_path': None}, {'credit_id': '52fe4527c3a36847f80bf5f1', 'department': 'Production', 'gender': 0, 'id': 63954, 'job': 'Producer', 'name': 'Gilles LaPlante', 'profile_path': None}, {'credit_id': '52fe4527c3a36847f80bf5f7', 'department': 'Sound', 'gender': 0, 'id': 63955, 'job': 'Original Music Composer', 'name': 'Davor Vulama', 'profile_path': None}, {'credit_id': '52fe4527c3a36847f80bf5fd', 'department': 'Camera', 'gender': 2, 'id': 63956, 'job': 'Director of Photography', 'name': 'Eric J. Goldstein', 'profile_path': None}, {'credit_id': '52fe4527c3a36847f80bf603', 'department': 'Editing', 'gender': 2, 'id': 63957, 'job': 'Editor', 'name': 'Garry M.B. Smith', 'profile_path': None}] 9755 +25968 [{'cast_id': 6, 'character': 'Ray', 'credit_id': '52fe4a99c3a368484e15cc59', 'gender': 0, 'id': 1078398, 'name': 'Aaron C. Peer', 'order': 0, 'profile_path': None}, {'cast_id': 7, 'character': 'JP', 'credit_id': '52fe4a99c3a368484e15cc5d', 'gender': 0, 'id': 1078404, 'name': 'Charlie Alberto', 'order': 1, 'profile_path': None}, {'cast_id': 9, 'character': 'Donna', 'credit_id': '52fe4a99c3a368484e15cc65', 'gender': 0, 'id': 1078405, 'name': 'Joan Alberto', 'order': 3, 'profile_path': None}, {'cast_id': 10, 'character': 'Neighhbor', 'credit_id': '52fe4a99c3a368484e15cc69', 'gender': 0, 'id': 1078406, 'name': 'Matt Conrad', 'order': 4, 'profile_path': None}, {'cast_id': 11, 'character': 'Lawyer', 'credit_id': '52fe4a99c3a368484e15cc6d', 'gender': 0, 'id': 1078407, 'name': 'Jim Goodling', 'order': 5, 'profile_path': None}, {'cast_id': 12, 'character': '911 Operator', 'credit_id': '52fe4a99c3a368484e15cc71', 'gender': 0, 'id': 1078408, 'name': 'Kasia Olczac', 'order': 6, 'profile_path': None}, {'cast_id': 13, 'character': 'Lisa', 'credit_id': '56194092925141374b002b25', 'gender': 1, 'id': 579551, 'name': 'Lana Del Rey', 'order': 7, 'profile_path': '/h67gzOEMgqcCEY5nXFtt1NRJjTF.jpg'}] [{'credit_id': '52fe4a99c3a368484e15cc3d', 'department': 'Directing', 'gender': 0, 'id': 1078398, 'job': 'Director', 'name': 'Aaron C. Peer', 'profile_path': None}, {'credit_id': '52fe4a99c3a368484e15cc43', 'department': 'Editing', 'gender': 0, 'id': 1078398, 'job': 'Editor', 'name': 'Aaron C. Peer', 'profile_path': None}, {'credit_id': '52fe4a99c3a368484e15cc55', 'department': 'Production', 'gender': 0, 'id': 1078399, 'job': 'Executive Producer', 'name': 'Benham Jones', 'profile_path': None}, {'credit_id': '52fe4a99c3a368484e15cc49', 'department': 'Writing', 'gender': 0, 'id': 1078399, 'job': 'Writer', 'name': 'Benham Jones', 'profile_path': None}, {'credit_id': '52fe4a99c3a368484e15cc4f', 'department': 'Sound', 'gender': 0, 'id': 1078401, 'job': 'Music', 'name': 'Austin Peer', 'profile_path': None}] 123634 +25969 [{'cast_id': 11, 'character': 'Mike Gaucher / Bruno Ferrari', 'credit_id': '52fe44b8c3a36847f80a6393', 'gender': 2, 'id': 3829, 'name': 'Jean-Paul Belmondo', 'order': 0, 'profile_path': '/iiik4HMqRzDoVyqugLLueHcAsiN.jpg'}, {'cast_id': 1, 'character': 'Jane Gardner', 'credit_id': '52fe44b8c3a36847f80a6367', 'gender': 1, 'id': 21462, 'name': 'Raquel Welch', 'order': 1, 'profile_path': '/dEr7QkkgOqCwH1uc5VXzTxDveAv.jpg'}, {'cast_id': 2, 'character': 'Doris', 'credit_id': '52fe44b8c3a36847f80a636b', 'gender': 1, 'id': 39887, 'name': 'Dany Saval', 'order': 2, 'profile_path': '/yDPcqoedlh5JOb6l7NEKi7Ot7zs.jpg'}, {'cast_id': 3, 'character': 'Le comte de Saint-Prix', 'credit_id': '52fe44b8c3a36847f80a636f', 'gender': 2, 'id': 26879, 'name': 'Raymond Gérôme', 'order': 3, 'profile_path': '/cbvas4hfhtKJklv4N298emJbVZ4.jpg'}, {'cast_id': 4, 'character': 'La vedette féminine', 'credit_id': '52fe44b8c3a36847f80a6373', 'gender': 1, 'id': 15200, 'name': 'Jane Birkin', 'order': 4, 'profile_path': '/e62bIWn1XolFTD5uZ47e3XH8xXK.jpg'}, {'cast_id': 5, 'character': 'La vedette masculine', 'credit_id': '52fe44b8c3a36847f80a6377', 'gender': 0, 'id': 35084, 'name': 'Johnny Hallyday', 'order': 5, 'profile_path': '/8MLIIaLiXo3hHAULYAvcXJNmCbt.jpg'}, {'cast_id': 6, 'character': 'Yves Mourousi', 'credit_id': '52fe44b8c3a36847f80a637b', 'gender': 0, 'id': 55919, 'name': 'Yves Mourousi', 'order': 6, 'profile_path': None}, {'cast_id': 7, 'character': 'Hyacinthe', 'credit_id': '52fe44b8c3a36847f80a637f', 'gender': 2, 'id': 55920, 'name': 'Charles Gérard', 'order': 7, 'profile_path': '/zwr3TgaWFH8Vvqgv4hcWhazfDx8.jpg'}, {'cast_id': 8, 'character': 'Le metteur en scène', 'credit_id': '52fe44b8c3a36847f80a6383', 'gender': 0, 'id': 19069, 'name': 'Claude Chabrol', 'order': 8, 'profile_path': '/wth1muwK6ZnwAHw2GPN631cUJpe.jpg'}, {'cast_id': 16, 'character': 'Sergio Campanese', 'credit_id': '52fe44b8c3a36847f80a63af', 'gender': 2, 'id': 21236, 'name': 'Aldo Maccione', 'order': 9, 'profile_path': '/qe5isTBNkm5KHHH0Pzo1SLVjVSr.jpg'}] [{'credit_id': '52fe44b8c3a36847f80a6389', 'department': 'Directing', 'gender': 2, 'id': 11201, 'job': 'Director', 'name': 'Claude Zidi', 'profile_path': '/tXp3e0IIwVMzNlztC6ufVdPKQ8Q.jpg'}, {'credit_id': '52fe44b8c3a36847f80a638f', 'department': 'Writing', 'gender': 2, 'id': 18565, 'job': 'Screenplay', 'name': 'Michel Audiard', 'profile_path': '/v1ZikQ0sKjG1SQaK6S4SvEFm4o0.jpg'}, {'credit_id': '52fe44b8c3a36847f80a6399', 'department': 'Production', 'gender': 2, 'id': 11203, 'job': 'Producer', 'name': 'Christian Fechner', 'profile_path': None}, {'credit_id': '52fe44b8c3a36847f80a639f', 'department': 'Production', 'gender': 0, 'id': 56280, 'job': 'Producer', 'name': 'René Malo', 'profile_path': None}, {'credit_id': '52fe44b8c3a36847f80a63a5', 'department': 'Sound', 'gender': 2, 'id': 1965, 'job': 'Original Music Composer', 'name': 'Vladimir Cosma', 'profile_path': '/ikwjkuMpSWcIwZPUt7u0lsVyUuA.jpg'}, {'credit_id': '52fe44b8c3a36847f80a63ab', 'department': 'Camera', 'gender': 2, 'id': 10469, 'job': 'Director of Photography', 'name': 'Claude Renoir', 'profile_path': '/41xD6gvddFwSUvVBGO5mvU6AOaU.jpg'}] 8767 +25974 [{'cast_id': 2, 'character': 'The Man', 'credit_id': '52fe4655c3a36847f80f96c3', 'gender': 2, 'id': 70230, 'name': 'Jeremy Theobald', 'order': 0, 'profile_path': '/ij6jFUCMnd2WTicjq4fu9ELPdWS.jpg'}] [{'credit_id': '52fe4655c3a36847f80f96bf', 'department': 'Directing', 'gender': 2, 'id': 525, 'job': 'Director', 'name': 'Christopher Nolan', 'profile_path': '/7OGmfDF4VHLLgbjxuEwTj3ga0uQ.jpg'}, {'credit_id': '52fe4655c3a36847f80f96c9', 'department': 'Writing', 'gender': 2, 'id': 525, 'job': 'Writer', 'name': 'Christopher Nolan', 'profile_path': '/7OGmfDF4VHLLgbjxuEwTj3ga0uQ.jpg'}, {'credit_id': '52fe4655c3a36847f80f96cf', 'department': 'Editing', 'gender': 2, 'id': 525, 'job': 'Editor', 'name': 'Christopher Nolan', 'profile_path': '/7OGmfDF4VHLLgbjxuEwTj3ga0uQ.jpg'}, {'credit_id': '52fe4655c3a36847f80f96d5', 'department': 'Crew', 'gender': 2, 'id': 525, 'job': 'Cinematography', 'name': 'Christopher Nolan', 'profile_path': '/7OGmfDF4VHLLgbjxuEwTj3ga0uQ.jpg'}, {'credit_id': '52fe4655c3a36847f80f96db', 'department': 'Production', 'gender': 1, 'id': 556, 'job': 'Producer', 'name': 'Emma Thomas', 'profile_path': '/f12TsNlAEomPdwxueS7ImMoJtsP.jpg'}, {'credit_id': '52fe4656c3a36847f80f96e1', 'department': 'Sound', 'gender': 2, 'id': 558, 'job': 'Original Music Composer', 'name': 'David Julyan', 'profile_path': None}, {'credit_id': '587fb24c92514107d5002c40', 'department': 'Production', 'gender': 0, 'id': 1392627, 'job': 'Producer', 'name': 'Steve Street', 'profile_path': None}, {'credit_id': '595e492dc3a368253c07b7f2', 'department': 'Art', 'gender': 0, 'id': 1846027, 'job': 'Set Designer', 'name': 'Alberto Mattiussi', 'profile_path': None}, {'credit_id': '595e4934c3a368265d085a8f', 'department': 'Art', 'gender': 2, 'id': 525, 'job': 'Set Designer', 'name': 'Christopher Nolan', 'profile_path': '/7OGmfDF4VHLLgbjxuEwTj3ga0uQ.jpg'}, {'credit_id': '595e4994c3a368265d085afd', 'department': 'Sound', 'gender': 0, 'id': 1846029, 'job': 'Sound', 'name': 'David Lloyd', 'profile_path': None}, {'credit_id': '595e499e9251410c56084264', 'department': 'Crew', 'gender': 0, 'id': 1846008, 'job': 'Special Effects', 'name': 'Ivan Cornell', 'profile_path': None}, {'credit_id': '595e49a6c3a368265d085b11', 'department': 'Camera', 'gender': 2, 'id': 527, 'job': 'Grip', 'name': 'Jonathan Nolan', 'profile_path': '/rYBQ8M3hDDY0eThVIvWHmrf4i0Y.jpg'}] 43629 +25975 [] [{'credit_id': '52fe4d119251416c7512ed39', 'department': 'Writing', 'gender': 0, 'id': 448930, 'job': 'Screenplay', 'name': "Shane O'Sullivan", 'profile_path': None}, {'credit_id': '52fe4d119251416c7512ed33', 'department': 'Directing', 'gender': 0, 'id': 448930, 'job': 'Director', 'name': "Shane O'Sullivan", 'profile_path': None}] 187156 +28873 [{'cast_id': 3, 'character': 'Nana', 'credit_id': '550706a39251412c050049d7', 'gender': 0, 'id': 43192, 'name': 'Katya Berger', 'order': 1, 'profile_path': None}, {'cast_id': 4, 'character': 'Count Muffat', 'credit_id': '550706c9c3a3686b750022ac', 'gender': 2, 'id': 18766, 'name': 'Jean-Pierre Aumont', 'order': 2, 'profile_path': '/zmH3P6Wr5hkiRo8Rx060nhvAEdX.jpg'}, {'cast_id': 5, 'character': 'Steiner', 'credit_id': '5507070792514174b60045bf', 'gender': 2, 'id': 39784, 'name': 'Yehuda Efroni', 'order': 3, 'profile_path': None}, {'cast_id': 6, 'character': 'Sabine', 'credit_id': '5507077cc3a3682529004aa0', 'gender': 2, 'id': 39784, 'name': 'Yehuda Efroni', 'order': 4, 'profile_path': None}, {'cast_id': 7, 'character': 'Faucherie', 'credit_id': '550707a292514179fb00229a', 'gender': 2, 'id': 14148, 'name': 'Massimo Serato', 'order': 5, 'profile_path': '/vQlY5M9F1KERGi5xr35ynderDMy.jpg'}, {'cast_id': 8, 'character': 'Satin', 'credit_id': '550707bac3a3682529004aaf', 'gender': 1, 'id': 20117, 'name': 'Debra Berger', 'order': 6, 'profile_path': None}, {'cast_id': 9, 'character': 'Zoe', 'credit_id': '550707d2c3a3683d78000b42', 'gender': 1, 'id': 1229207, 'name': 'Shirin Taylor', 'order': 7, 'profile_path': None}, {'cast_id': 10, 'character': 'Rennée de Chéselles', 'credit_id': '550707f1c3a36862e9004eb9', 'gender': 0, 'id': 101576, 'name': 'Annie Belle', 'order': 8, 'profile_path': '/vmLEPb1WZ1eIbGcjDnMaWFt1lJL.jpg'}, {'cast_id': 11, 'character': 'Xavier', 'credit_id': '550708249251412c050049ec', 'gender': 2, 'id': 101549, 'name': 'Paul Müller', 'order': 9, 'profile_path': '/v3tZduw1pzCx6eeoYO6eWkwB0cu.jpg'}, {'cast_id': 12, 'character': 'Hector Muffat', 'credit_id': '5507083fc3a36862e9004eca', 'gender': 0, 'id': 1440944, 'name': 'Marcus Beresford', 'order': 10, 'profile_path': None}, {'cast_id': 13, 'character': 'Fontan', 'credit_id': '5507085ec3a368262e004989', 'gender': 0, 'id': 172953, 'name': 'Robert Bridges', 'order': 11, 'profile_path': None}, {'cast_id': 14, 'character': 'George Mellies', 'credit_id': '5507087c9251416cb7002893', 'gender': 2, 'id': 39432, 'name': 'Tom Felleghy', 'order': 12, 'profile_path': '/7Q1KOJeCp2ZODPiSVQXqrmXr5QE.jpg'}] [{'credit_id': '550706449251412c050049d2', 'department': 'Writing', 'gender': 2, 'id': 29496, 'job': 'Writer', 'name': 'Marc Behm', 'profile_path': None}, {'credit_id': '5507066c9251416cb700287d', 'department': 'Writing', 'gender': 2, 'id': 87667, 'job': 'Writer', 'name': 'Émile Zola', 'profile_path': '/oXA6DkOtc1xYaKmmbAUc2l56UEh.jpg'}, {'credit_id': '550705c99251411db4002d6c', 'department': 'Directing', 'gender': 0, 'id': 139107, 'job': 'Director', 'name': 'Dan Wolman', 'profile_path': '/qr83K1R6gbT4oq8WAWFx16k4GOA.jpg'}] 168538 +29387 [{'cast_id': 4, 'character': 'King Lear', 'credit_id': '52fe4606c3a36847f80e7e7b', 'gender': 2, 'id': 13324, 'name': 'Paul Scofield', 'order': 0, 'profile_path': '/gt4YFHyhE29CfpiZxknPy0aAGx.jpg'}, {'cast_id': 5, 'character': 'Goneril', 'credit_id': '52fe4606c3a36847f80e7e7f', 'gender': 1, 'id': 97337, 'name': 'Irene Worth', 'order': 1, 'profile_path': '/bezsJWpUQrgN9X9V73vABKbf4fQ.jpg'}, {'cast_id': 6, 'character': 'Albany', 'credit_id': '52fe4606c3a36847f80e7e83', 'gender': 2, 'id': 4973, 'name': 'Cyril Cusack', 'order': 2, 'profile_path': '/5d6OeKNqpRSMhwXd1FHV9KeX4WS.jpg'}, {'cast_id': 12, 'character': 'Regan', 'credit_id': '54aa9aee92514156460018ee', 'gender': 0, 'id': 1220081, 'name': 'Susan Engel', 'order': 3, 'profile_path': None}, {'cast_id': 8, 'character': 'Kent', 'credit_id': '52fe4606c3a36847f80e7e8b', 'gender': 2, 'id': 134664, 'name': 'Tom Fleming', 'order': 4, 'profile_path': '/ct5xnZkrhoE0YVxNmm4pYZcCNIR.jpg'}, {'cast_id': 9, 'character': 'Cordelia', 'credit_id': '52fe4606c3a36847f80e7e8f', 'gender': 1, 'id': 147017, 'name': 'Anne-Lise Gabold', 'order': 5, 'profile_path': None}, {'cast_id': 10, 'character': 'Edmund', 'credit_id': '52fe4606c3a36847f80e7e93', 'gender': 2, 'id': 25666, 'name': 'Ian Hogg', 'order': 6, 'profile_path': None}, {'cast_id': 11, 'character': 'Edgar', 'credit_id': '52fe4606c3a36847f80e7e97', 'gender': 2, 'id': 147018, 'name': 'Robert Langdon Lloyd', 'order': 7, 'profile_path': None}] [{'credit_id': '52fe4606c3a36847f80e7e77', 'department': 'Writing', 'gender': 2, 'id': 6210, 'job': 'Author', 'name': 'William Shakespeare', 'profile_path': '/2z4njqosEQM4g26ttb9grG9rJUg.jpg'}, {'credit_id': '52fe4606c3a36847f80e7e6b', 'department': 'Directing', 'gender': 2, 'id': 60981, 'job': 'Director', 'name': 'Peter Brook', 'profile_path': '/1xkrcl5YFJBELNWb4k0qxFmv27i.jpg'}, {'credit_id': '52fe4606c3a36847f80e7e71', 'department': 'Writing', 'gender': 2, 'id': 60981, 'job': 'Writer', 'name': 'Peter Brook', 'profile_path': '/1xkrcl5YFJBELNWb4k0qxFmv27i.jpg'}] 42495 +33755 [{'cast_id': 4, 'character': 'Zak Gibbs', 'credit_id': '52fe46319251416c75070503', 'gender': 2, 'id': 17772, 'name': 'Jesse Bradford', 'order': 0, 'profile_path': '/irD0iVNTmR0kEkVTZmKTuZejbGH.jpg'}, {'cast_id': 5, 'character': 'Francesca', 'credit_id': '52fe46319251416c75070507', 'gender': 1, 'id': 21124, 'name': 'Paula Garcés', 'order': 1, 'profile_path': '/bc1NhQCaUsoJFCi6vIGxiljQl86.jpg'}, {'cast_id': 6, 'character': 'Dr. Gibbs', 'credit_id': '52fe46319251416c7507050b', 'gender': 2, 'id': 41465, 'name': 'Robin Thomas', 'order': 2, 'profile_path': '/seivBO3KRFAKqNgL3wnvpwxHwKz.jpg'}, {'cast_id': 7, 'character': 'Dr. Earl Dopler', 'credit_id': '52fe46319251416c7507050f', 'gender': 2, 'id': 67711, 'name': 'French Stewart', 'order': 3, 'profile_path': '/8IpVIyRvwRAeWjsGtM7eAcZlj4t.jpg'}, {'cast_id': 8, 'character': 'Henry Gates', 'credit_id': '52fe46319251416c75070513', 'gender': 2, 'id': 2712, 'name': 'Michael Biehn', 'order': 4, 'profile_path': '/sTk3ltlHRvRg5quheWBxaLdLgC8.jpg'}, {'cast_id': 9, 'character': 'Jenny Gibbs', 'credit_id': '52fe46319251416c75070517', 'gender': 1, 'id': 11806, 'name': 'Julia Sweeney', 'order': 5, 'profile_path': '/mOl6ecUZ1F6XIh1jvPnxzkrRMXn.jpg'}, {'cast_id': 10, 'character': 'Meeker', 'credit_id': '52fe46319251416c7507051b', 'gender': 2, 'id': 69119, 'name': 'Garikayi Mutambirwa', 'order': 6, 'profile_path': '/51Og9NEDb6rHpovDcEOlWO0fyGE.jpg'}, {'cast_id': 11, 'character': 'Kelly Gibbs', 'credit_id': '52fe46319251416c7507051f', 'gender': 1, 'id': 90423, 'name': 'Lindze Letherman', 'order': 7, 'profile_path': '/zFgbXVCZgHaO1kAsShaDBN17SVU.jpg'}, {'cast_id': 12, 'character': 'Richard', 'credit_id': '52fe46319251416c75070523', 'gender': 2, 'id': 95475, 'name': 'Jason George', 'order': 8, 'profile_path': '/x5rlEblgWx6Lvm9vwNQr83upl9Z.jpg'}, {'cast_id': 72, 'character': 'NSA Agent Moore', 'credit_id': '54aff343c3a368399e00474e', 'gender': 2, 'id': 38570, 'name': 'Ken Jenkins', 'order': 9, 'profile_path': '/6w9OgC4nGUfYavC86a3FeeMgUJ4.jpg'}, {'cast_id': 73, 'character': 'Administrator', 'credit_id': '54aff37192514169e00044fb', 'gender': 0, 'id': 53303, 'name': 'Tony Abatemarco', 'order': 10, 'profile_path': '/pGobBL7YIG2sQDTEZZnOHTfgTSD.jpg'}, {'cast_id': 75, 'character': 'Car Salesman (uncredited)', 'credit_id': '56ad51ee9251417e14007463', 'gender': 0, 'id': 203950, 'name': 'Brian J. Williams', 'order': 11, 'profile_path': '/dYm5IuOx12977wMGXOiKpCpXuoD.jpg'}] [{'credit_id': '54afed0cc3a3683fba00443d', 'department': 'Sound', 'gender': 2, 'id': 3687, 'job': 'Sound Effects Editor', 'name': 'Harry Cohen', 'profile_path': None}, {'credit_id': '52fe46319251416c75070529', 'department': 'Production', 'gender': 1, 'id': 869, 'job': 'Producer', 'name': 'Gale Anne Hurd', 'profile_path': '/oRWGpXdIm2lu870aaKpgO3wy19n.jpg'}, {'credit_id': '54afeab79251414aee001cc2', 'department': 'Editing', 'gender': 2, 'id': 2033, 'job': 'Editor', 'name': 'Peter E. Berger', 'profile_path': None}, {'credit_id': '52fe46319251416c750704f3', 'department': 'Directing', 'gender': 2, 'id': 2388, 'job': 'Director', 'name': 'Jonathan Frakes', 'profile_path': '/51Z5EbZxtVPm6GiN0Wc9CLP4z3s.jpg'}, {'credit_id': '54afef2dc3a3684abf002913', 'department': 'Crew', 'gender': 0, 'id': 9347, 'job': 'Second Unit Cinematographer', 'name': 'Paul Hughen', 'profile_path': None}, {'credit_id': '569c1364c3a36858d2000b54', 'department': 'Camera', 'gender': 2, 'id': 17146, 'job': 'Director of Photography', 'name': 'Tim Suhrstedt', 'profile_path': None}, {'credit_id': '54afed38c3a368399e0046d8', 'department': 'Sound', 'gender': 0, 'id': 13179, 'job': 'Sound Effects Editor', 'name': 'Tony Lamberti', 'profile_path': '/jQTxQeV6n6zjRYDgyxiQzUx08Kq.jpg'}, {'credit_id': '54afeb70c3a3682e40001d5f', 'department': 'Art', 'gender': 0, 'id': 15328, 'job': 'Art Direction', 'name': 'Kevin Kavanaugh', 'profile_path': None}, {'credit_id': '54afed63c3a3682e40001d8f', 'department': 'Sound', 'gender': 0, 'id': 16736, 'job': 'Sound Designer', 'name': 'Lon Bender', 'profile_path': None}, {'credit_id': '52fe46319251416c75070535', 'department': 'Production', 'gender': 1, 'id': 19679, 'job': 'Casting', 'name': 'Mary Gail Artz', 'profile_path': None}, {'credit_id': '52fe46319251416c7507053b', 'department': 'Production', 'gender': 1, 'id': 19680, 'job': 'Casting', 'name': 'Barbara Cohen', 'profile_path': None}, {'credit_id': '52fe46319251416c7507054d', 'department': 'Costume & Make-Up', 'gender': 1, 'id': 22107, 'job': 'Costume Design', 'name': 'Deborah Everton', 'profile_path': None}, {'credit_id': '54afee31c3a368070f002bed', 'department': 'Visual Effects', 'gender': 0, 'id': 42264, 'job': 'Visual Effects Producer', 'name': 'Jacqueline M. Lopez', 'profile_path': None}, {'credit_id': '54afeb4f92514169e0004458', 'department': 'Art', 'gender': 0, 'id': 25748, 'job': 'Production Design', 'name': 'Marek Dobrowolski', 'profile_path': None}, {'credit_id': '54aff1169251414a3d001cb5', 'department': 'Crew', 'gender': 0, 'id': 54515, 'job': 'Choreographer', 'name': 'Hi-Hat', 'profile_path': '/9KIBPhKOxizRBH3YPLSSVaL2PYW.jpg'}, {'credit_id': '54afeacfc3a368070f002ba0', 'department': 'Editing', 'gender': 0, 'id': 59563, 'job': 'Editor', 'name': 'Jeff Canavan', 'profile_path': None}, {'credit_id': '52fe46319251416c7507052f', 'department': 'Production', 'gender': 1, 'id': 60268, 'job': 'Producer', 'name': 'Julia Pistor', 'profile_path': None}, {'credit_id': '54afeb819251414afd00205a', 'department': 'Art', 'gender': 1, 'id': 63673, 'job': 'Set Decoration', 'name': 'Brana Rosenfeld', 'profile_path': None}, {'credit_id': '52fe46319251416c750704f9', 'department': 'Writing', 'gender': 2, 'id': 64735, 'job': 'Writer', 'name': 'Rob Hedden', 'profile_path': None}, {'credit_id': '54afeeacc3a3682e1b001dd4', 'department': 'Crew', 'gender': 2, 'id': 81687, 'job': 'Stunt Coordinator', 'name': 'Rick Avery', 'profile_path': '/rXar2OrwkorJyQ9dRGf18zUEByW.jpg'}, {'credit_id': '52fe46319251416c750704ff', 'department': 'Writing', 'gender': 2, 'id': 90393, 'job': 'Writer', 'name': 'Andy Hedden', 'profile_path': None}, {'credit_id': '54afebc59251411ef60041f3', 'department': 'Costume & Make-Up', 'gender': 2, 'id': 579077, 'job': 'Makeup Department Head', 'name': 'Kenny Myers', 'profile_path': None}, {'credit_id': '54afef10c3a3682e40001db2', 'department': 'Camera', 'gender': 0, 'id': 582808, 'job': 'Still Photographer', 'name': 'Claudette Barius', 'profile_path': None}, {'credit_id': '54afef8f9251414ae5001e20', 'department': 'Camera', 'gender': 2, 'id': 1108768, 'job': 'Camera Operator', 'name': 'David Gasperik', 'profile_path': None}, {'credit_id': '54aff0c6c3a3684abf00292f', 'department': 'Production', 'gender': 1, 'id': 1139092, 'job': 'Location Manager', 'name': 'Lisa Blok-Linson', 'profile_path': None}, {'credit_id': '52fe46319251416c75070547', 'department': 'Sound', 'gender': 0, 'id': 1206109, 'job': 'Music', 'name': 'Jamshed Sharifi', 'profile_path': None}, {'credit_id': '54afec319251414ae5001dd2', 'department': 'Art', 'gender': 1, 'id': 1246457, 'job': 'Assistant Art Director', 'name': 'Natalie Richards', 'profile_path': None}, {'credit_id': '54afebb3c3a3682e1b001da4', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 1338670, 'job': 'Hairstylist', 'name': 'Joy Zapata', 'profile_path': None}, {'credit_id': '54aff14d925141152c004125', 'department': 'Crew', 'gender': 0, 'id': 1345633, 'job': 'Studio Teachers', 'name': 'Lucas Moore', 'profile_path': None}, {'credit_id': '54afec0ac3a368399e0046c2', 'department': 'Art', 'gender': 0, 'id': 1377122, 'job': 'Art Department Coordinator', 'name': 'Pam Cartmel', 'profile_path': None}, {'credit_id': '54afed249251411ef6004212', 'department': 'Sound', 'gender': 0, 'id': 1392081, 'job': 'Sound Effects Editor', 'name': 'Randy Kelley', 'profile_path': None}, {'credit_id': '54afeded9251414aee001d0d', 'department': 'Visual Effects', 'gender': 0, 'id': 1392906, 'job': 'Visual Effects Producer', 'name': 'Gayle Busby', 'profile_path': None}, {'credit_id': '54afefe89251414a3d001ca0', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 1400085, 'job': 'Set Costumer', 'name': 'Steve Constancio', 'profile_path': None}, {'credit_id': '54afebf1c3a3684abf0028e5', 'department': 'Production', 'gender': 1, 'id': 1400543, 'job': 'Production Supervisor', 'name': 'Diane L. Sabatini', 'profile_path': None}, {'credit_id': '54afec579251411ef6004201', 'department': 'Art', 'gender': 0, 'id': 1403397, 'job': 'Construction Foreman', 'name': 'Randall S. Coe', 'profile_path': None}, {'credit_id': '54afed759251414afd002088', 'department': 'Sound', 'gender': 0, 'id': 1404841, 'job': 'ADR & Dubbing', 'name': 'Zack Davis', 'profile_path': None}, {'credit_id': '54afee50c3a3687744002673', 'department': 'Crew', 'gender': 0, 'id': 1406105, 'job': 'Visual Effects Editor', 'name': 'Steve Rhee', 'profile_path': None}, {'credit_id': '54afed4d9251414aee001cff', 'department': 'Sound', 'gender': 0, 'id': 1406389, 'job': 'Sound Effects Editor', 'name': 'Bruce Tanis', 'profile_path': None}, {'credit_id': '54afee83c3a3682e40001da4', 'department': 'Visual Effects', 'gender': 0, 'id': 1406922, 'job': 'Visual Effects Supervisor', 'name': 'Michael L. Fink', 'profile_path': None}, {'credit_id': '54afee09c3a368226e0022ca', 'department': 'Crew', 'gender': 0, 'id': 1407359, 'job': 'Visual Effects Art Director', 'name': 'Lubo Hristov', 'profile_path': None}, {'credit_id': '54afeced92514169e000447b', 'department': 'Sound', 'gender': 0, 'id': 1407812, 'job': 'Supervising Sound Editor', 'name': 'Geoffrey G. Rubay', 'profile_path': None}, {'credit_id': '54afeb9fc3a3683fba004427', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 1411258, 'job': 'Hairstylist', 'name': 'Linda Arnold', 'profile_path': None}, {'credit_id': '54afebd89251413fe1001ff7', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 1411259, 'job': 'Makeup Artist', 'name': 'Richard Snell', 'profile_path': None}, {'credit_id': '54afec449251414a3d001c59', 'department': 'Art', 'gender': 0, 'id': 1411260, 'job': 'Construction Coordinator', 'name': 'Rodney Armanino', 'profile_path': None}, {'credit_id': '54afec749251414afd00206f', 'department': 'Art', 'gender': 0, 'id': 1411261, 'job': 'Construction Foreman', 'name': 'Alan Allinger', 'profile_path': None}, {'credit_id': '54afec879251414ae5001dd8', 'department': 'Art', 'gender': 0, 'id': 1411262, 'job': 'Set Designer', 'name': 'Joel Ariniello', 'profile_path': None}, {'credit_id': '54afec9cc3a3682e40001d77', 'department': 'Crew', 'gender': 0, 'id': 1411263, 'job': 'Scenic Artist', 'name': 'Aprile Lanza Boettcher', 'profile_path': None}, {'credit_id': '54afecb4c3a368399e0046ce', 'department': 'Crew', 'gender': 2, 'id': 1411264, 'job': 'Property Master', 'name': 'Bill MacSems', 'profile_path': None}, {'credit_id': '54afed8b9251414a3d001c70', 'department': 'Editing', 'gender': 0, 'id': 1411265, 'job': 'Dialogue Editor', 'name': 'Vic Radulich', 'profile_path': None}, {'credit_id': '54afedadc3a3683fba004448', 'department': 'Crew', 'gender': 0, 'id': 1411266, 'job': 'Special Effects Coordinator', 'name': 'Richard O. Helmer', 'profile_path': None}, {'credit_id': '54afedbec3a3684abf002900', 'department': 'Crew', 'gender': 0, 'id': 1411267, 'job': 'Special Effects Coordinator', 'name': 'Gary Monak', 'profile_path': None}, {'credit_id': '54afedd2c3a3683fba004451', 'department': 'Crew', 'gender': 0, 'id': 1411268, 'job': 'Special Effects Coordinator', 'name': 'Lynn Garrido', 'profile_path': None}, {'credit_id': '54afeec3c3a3682e40001da7', 'department': 'Camera', 'gender': 0, 'id': 1411270, 'job': 'Camera Operator', 'name': 'Jeffrey Greeley', 'profile_path': None}, {'credit_id': '54afeedfc3a368399e0046fa', 'department': 'Camera', 'gender': 2, 'id': 1411271, 'job': 'Camera Operator', 'name': 'Michael J. Walker', 'profile_path': None}, {'credit_id': '54afeef3c3a368774400267f', 'department': 'Camera', 'gender': 0, 'id': 1411272, 'job': 'Steadicam Operator', 'name': 'Lawrence Karman', 'profile_path': None}, {'credit_id': '54afefcb925141152c004109', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 1411273, 'job': 'Set Costumer', 'name': 'Maggie Bailey', 'profile_path': None}, {'credit_id': '54aff00ac3a368070f002c15', 'department': 'Crew', 'gender': 0, 'id': 1411275, 'job': 'Transportation Coordinator', 'name': 'Michael Menapace', 'profile_path': None}, {'credit_id': '54aff03b92514169e00044bd', 'department': 'Production', 'gender': 1, 'id': 1411276, 'job': 'Location Manager', 'name': 'Deborah J. Page', 'profile_path': None}, {'credit_id': '54aff059c3a3682e40001dc7', 'department': 'Directing', 'gender': 0, 'id': 1411277, 'job': 'Script Supervisor', 'name': 'Sue Meserve', 'profile_path': None}, {'credit_id': '54aff06cc3a368070f002c1e', 'department': 'Production', 'gender': 0, 'id': 1411278, 'job': 'Location Manager', 'name': 'Robert Brooks Mendel', 'profile_path': None}, {'credit_id': '54aff0abc3a3683fba004486', 'department': 'Directing', 'gender': 1, 'id': 1411279, 'job': 'Script Supervisor', 'name': 'Judi Brown', 'profile_path': None}, {'credit_id': '54aff1629251414aee001d48', 'department': 'Crew', 'gender': 0, 'id': 1411281, 'job': 'Unit Publicist', 'name': 'David Mortimer', 'profile_path': None}] 15028 +40287 [{'cast_id': 1001, 'character': 'Max.', 'credit_id': '52fe49fb9251416c750d9d71', 'gender': 2, 'id': 1016035, 'name': 'Max Linder', 'order': 0, 'profile_path': '/4LCfa2kvTT252mrivhbyL2hyWEO.jpg'}, {'cast_id': 1002, 'character': "Betty - Max's Fiancée", 'credit_id': '52fe49fb9251416c750d9d75', 'gender': 0, 'id': 1109645, 'name': 'Alta Allen', 'order': 1, 'profile_path': '/vTmN7Fmag3pMSmm4a750hbiwPQM.jpg'}, {'cast_id': 1003, 'character': "John - Max's Valet", 'credit_id': '52fe49fb9251416c750d9d79', 'gender': 0, 'id': 1109646, 'name': 'Ralph McCullough', 'order': 2, 'profile_path': None}, {'cast_id': 1004, 'character': "Mary - Max's Maid", 'credit_id': '52fe49fb9251416c750d9d7d', 'gender': 0, 'id': 1109647, 'name': 'Betty K. Peterson', 'order': 3, 'profile_path': None}, {'cast_id': 1005, 'character': "Max's False Friend", 'credit_id': '52fe49fb9251416c750d9d81', 'gender': 0, 'id': 1109648, 'name': 'F.B. Crayne', 'order': 4, 'profile_path': None}, {'cast_id': 1006, 'character': 'The Railroad Conductor', 'credit_id': '52fe49fb9251416c750d9d85', 'gender': 0, 'id': 1109649, 'name': 'Chance Ward', 'order': 5, 'profile_path': None}, {'cast_id': 1007, 'character': 'A Station Agent', 'credit_id': '52fe49fb9251416c750d9d89', 'gender': 0, 'id': 1109650, 'name': 'Hugh Saxon', 'order': 6, 'profile_path': None}, {'cast_id': 1008, 'character': "Station Agent's Daughter", 'credit_id': '52fe49fb9251416c750d9d8d', 'gender': 0, 'id': 1109651, 'name': 'Thelma Percy', 'order': 7, 'profile_path': None}, {'cast_id': 1009, 'character': 'A Jail Bird', 'credit_id': '52fe49fb9251416c750d9d91', 'gender': 0, 'id': 1109652, 'name': 'C.E. Anderson', 'order': 8, 'profile_path': None}, {'cast_id': 1011, 'character': "Betty's Hawaiian Maid", 'credit_id': '52fe49fb9251416c750d9d99', 'gender': 0, 'id': 1109654, 'name': 'Lola Gonzales', 'order': 10, 'profile_path': None}, {'cast_id': 1012, 'character': "Max's Chef - in Mirror Gag", 'credit_id': '52fe49fc9251416c750d9d9d', 'gender': 0, 'id': 1109655, 'name': 'Harry Mann', 'order': 11, 'profile_path': None}, {'cast_id': 1013, 'character': 'The Chimpanzee', 'credit_id': '52fe49fc9251416c750d9da1', 'gender': 0, 'id': 1109656, 'name': 'Joe Martin', 'order': 12, 'profile_path': None}] [{'credit_id': '55140dfa92514103e9002d99', 'department': 'Camera', 'gender': 2, 'id': 14478, 'job': 'Director of Photography', 'name': 'Charles Van Enger', 'profile_path': None}, {'credit_id': '52fe49fb9251416c750d9d6d', 'department': 'Directing', 'gender': 2, 'id': 1016035, 'job': 'Director', 'name': 'Max Linder', 'profile_path': '/4LCfa2kvTT252mrivhbyL2hyWEO.jpg'}, {'credit_id': '54635d490e0a26780d002119', 'department': 'Production', 'gender': 2, 'id': 1016035, 'job': 'Producer', 'name': 'Max Linder', 'profile_path': '/4LCfa2kvTT252mrivhbyL2hyWEO.jpg'}, {'credit_id': '54635d3bc3a368124a0020bd', 'department': 'Writing', 'gender': 2, 'id': 1016035, 'job': 'Writer', 'name': 'Max Linder', 'profile_path': '/4LCfa2kvTT252mrivhbyL2hyWEO.jpg'}] 97995 +44831 [{'cast_id': 1, 'character': 'Ash Ketchum', 'credit_id': '52fe43de9251416c750213f1', 'gender': 1, 'id': 67830, 'name': 'Veronica Taylor', 'order': 0, 'profile_path': '/28EFUb3bPWJaWYzZIxurGCrDpHk.jpg'}, {'cast_id': 3, 'character': 'Misty / Jessie / Wigglytuff', 'credit_id': '52fe43de9251416c750213f5', 'gender': 1, 'id': 67832, 'name': 'Rachael Lillis', 'order': 2, 'profile_path': None}, {'cast_id': 4, 'character': 'Brock / James / Squirtle', 'credit_id': '52fe43de9251416c750213f9', 'gender': 2, 'id': 67833, 'name': 'Eric Stuart', 'order': 3, 'profile_path': '/3aTpgbntEzsaPCfVFY5ZddZxqBR.jpg'}] [{'credit_id': '52fe43de9251416c750213ff', 'department': 'Directing', 'gender': 2, 'id': 65428, 'job': 'Director', 'name': 'Michael Haigney', 'profile_path': None}, {'credit_id': '52fe43de9251416c75021405', 'department': 'Writing', 'gender': 0, 'id': 65429, 'job': 'Screenplay', 'name': 'Kunihiko Yuyama', 'profile_path': '/9z3LqqCMRZWyptfIZnlAdG7CnaZ.jpg'}, {'credit_id': '52fe43de9251416c7502140b', 'department': 'Directing', 'gender': 0, 'id': 65429, 'job': 'Director', 'name': 'Kunihiko Yuyama', 'profile_path': '/9z3LqqCMRZWyptfIZnlAdG7CnaZ.jpg'}, {'credit_id': '52fe43de9251416c75021411', 'department': 'Writing', 'gender': 0, 'id': 65430, 'job': 'Screenplay', 'name': 'Takeshi Shudo', 'profile_path': None}] 10991 +44836 [{'cast_id': 1, 'character': 'Ash Ketchum (voice)', 'credit_id': '52fe45049251416c75048de1', 'gender': 1, 'id': 67830, 'name': 'Veronica Taylor', 'order': 0, 'profile_path': '/28EFUb3bPWJaWYzZIxurGCrDpHk.jpg'}, {'cast_id': 2, 'character': 'Misty (voice)', 'credit_id': '52fe45049251416c75048de5', 'gender': 1, 'id': 67832, 'name': 'Rachael Lillis', 'order': 1, 'profile_path': None}, {'cast_id': 3, 'character': 'Meowth (voice)', 'credit_id': '52fe45049251416c75048de9', 'gender': 1, 'id': 73043, 'name': 'Maddie Blaustein', 'order': 2, 'profile_path': '/UJ5MREG89nf7XPjzhm2jzbnqeA.jpg'}, {'cast_id': 4, 'character': 'Pikachu (voice)', 'credit_id': '52fe45049251416c75048ded', 'gender': 1, 'id': 73044, 'name': 'Ikue Ōtani', 'order': 3, 'profile_path': '/1RJeAkOY4aA6NgbzQeXccaRj6I3.jpg'}] [{'credit_id': '52fe45049251416c75048e35', 'department': 'Camera', 'gender': 0, 'id': 12186, 'job': 'Director of Photography', 'name': 'Hisao Shirai', 'profile_path': None}, {'credit_id': '52fe45049251416c75048df9', 'department': 'Directing', 'gender': 0, 'id': 65429, 'job': 'Director', 'name': 'Kunihiko Yuyama', 'profile_path': '/9z3LqqCMRZWyptfIZnlAdG7CnaZ.jpg'}, {'credit_id': '52fe45049251416c75048e29', 'department': 'Production', 'gender': 0, 'id': 65431, 'job': 'Producer', 'name': 'Choji Yoshikawa', 'profile_path': None}, {'credit_id': '52fe45049251416c75048e05', 'department': 'Production', 'gender': 2, 'id': 67943, 'job': 'Executive Producer', 'name': 'Norman J. Grossfeld', 'profile_path': None}, {'credit_id': '52fe45049251416c75048e0b', 'department': 'Production', 'gender': 0, 'id': 67944, 'job': 'Executive Producer', 'name': 'Alfred R. Kahn', 'profile_path': None}, {'credit_id': '52fe45049251416c75048e11', 'department': 'Production', 'gender': 0, 'id': 73045, 'job': 'Executive Producer', 'name': 'Takashi Kawaguchi', 'profile_path': None}, {'credit_id': '52fe45049251416c75048e17', 'department': 'Production', 'gender': 0, 'id': 73046, 'job': 'Executive Producer', 'name': 'Masakazu Kubo', 'profile_path': None}, {'credit_id': '52fe45049251416c75048e1d', 'department': 'Production', 'gender': 0, 'id': 73047, 'job': 'Producer', 'name': 'Yukako Matsusako', 'profile_path': None}, {'credit_id': '52fe45049251416c75048e23', 'department': 'Production', 'gender': 0, 'id': 73048, 'job': 'Producer', 'name': 'Takemoto Mori', 'profile_path': None}, {'credit_id': '52fe45049251416c75048df3', 'department': 'Directing', 'gender': 2, 'id': 73051, 'job': 'Director', 'name': 'Jim Malone', 'profile_path': None}, {'credit_id': '52fe45049251416c75048dff', 'department': 'Writing', 'gender': 2, 'id': 25761, 'job': 'Screenplay', 'name': 'Hideki Sonoda', 'profile_path': None}, {'credit_id': '52fe45049251416c75048e2f', 'department': 'Sound', 'gender': 0, 'id': 73052, 'job': 'Original Music Composer', 'name': 'Shinji Miyazaki', 'profile_path': None}, {'credit_id': '52fe45049251416c75048e3b', 'department': 'Editing', 'gender': 0, 'id': 73053, 'job': 'Editor', 'name': 'Yumiko Fuse', 'profile_path': None}, {'credit_id': '52fe45049251416c75048e41', 'department': 'Editing', 'gender': 0, 'id': 73054, 'job': 'Editor', 'name': 'Toshio Henmi', 'profile_path': None}, {'credit_id': '52fe45049251416c75048e47', 'department': 'Editing', 'gender': 0, 'id': 73055, 'job': 'Editor', 'name': 'Yutaka Henmi', 'profile_path': None}, {'credit_id': '52fe45049251416c75048e4d', 'department': 'Editing', 'gender': 0, 'id': 73056, 'job': 'Editor', 'name': 'Yutaka Ita', 'profile_path': None}, {'credit_id': '52fe45049251416c75048e53', 'department': 'Editing', 'gender': 0, 'id': 73057, 'job': 'Editor', 'name': 'Yukiko Nojiri', 'profile_path': None}] 12600 +45275 [{'cast_id': 2, 'character': 'Ebba', 'credit_id': '534fd1f00e0a267eab000dd5', 'gender': 1, 'id': 1310711, 'name': 'Lisa Loven Kongsli', 'order': 1, 'profile_path': '/pwT0UuQ1tsleh3urfabVRj0UE3S.jpg'}, {'cast_id': 3, 'character': 'Tomas', 'credit_id': '534fd2140e0a267eb6000e3a', 'gender': 2, 'id': 116614, 'name': 'Johannes Bah Kuhnke', 'order': 2, 'profile_path': '/jUd1Sa6VwcJxGsuuxCbS4QR7pot.jpg'}, {'cast_id': 4, 'character': 'Vera', 'credit_id': '534fd2390e0a267ea8000e89', 'gender': 0, 'id': 1310713, 'name': 'Clara Wettergren', 'order': 3, 'profile_path': None}, {'cast_id': 5, 'character': 'Harry', 'credit_id': '534fd2490e0a267ea1000ed8', 'gender': 0, 'id': 1310714, 'name': 'Vincent Wettergren', 'order': 4, 'profile_path': None}, {'cast_id': 6, 'character': 'Brady', 'credit_id': '543c0752c3a3681990005db1', 'gender': 2, 'id': 55493, 'name': 'Brady Corbet', 'order': 5, 'profile_path': '/cnlvcYJP4UyeKdbCCuTKvFkjWSH.jpg'}, {'cast_id': 7, 'character': 'Mats', 'credit_id': '54af17919251413fe1000473', 'gender': 2, 'id': 571418, 'name': 'Kristofer Hivju', 'order': 6, 'profile_path': '/qlGV5b8FMx2Ut1fgmm6TDc1fHxC.jpg'}, {'cast_id': 8, 'character': 'Fanni', 'credit_id': '54af179fc3a3683fba002768', 'gender': 0, 'id': 1410873, 'name': 'Fanni Metelius', 'order': 7, 'profile_path': None}, {'cast_id': 9, 'character': 'Charlotte', 'credit_id': '54af17ac925141152c002647', 'gender': 0, 'id': 1410874, 'name': 'Karin Myrenberg', 'order': 8, 'profile_path': None}, {'cast_id': 10, 'character': 'Cleaner', 'credit_id': '54af17be9251411ef60029dd', 'gender': 0, 'id': 1410875, 'name': 'Johannes Moustos', 'order': 9, 'profile_path': None}] [{'credit_id': '534fd1a80e0a267eb6000e32', 'department': 'Directing', 'gender': 2, 'id': 56370, 'job': 'Director', 'name': 'Ruben Östlund', 'profile_path': '/nWNVq8U9u1EWFR1Eg85Z14BPGZu.jpg'}, {'credit_id': '534fd1d30e0a267ea1000eca', 'department': 'Writing', 'gender': 2, 'id': 56370, 'job': 'Writer', 'name': 'Ruben Östlund', 'profile_path': '/nWNVq8U9u1EWFR1Eg85Z14BPGZu.jpg'}, {'credit_id': '54af17d19251414aee000465', 'department': 'Production', 'gender': 2, 'id': 18824, 'job': 'Producer', 'name': 'Philippe Bober', 'profile_path': None}, {'credit_id': '54af17ddc3a3683fba002770', 'department': 'Production', 'gender': 0, 'id': 932556, 'job': 'Producer', 'name': 'Erik Hemmendorff', 'profile_path': None}, {'credit_id': '54af17f0c3a368226e0004e5', 'department': 'Production', 'gender': 0, 'id': 1410876, 'job': 'Producer', 'name': 'Marie Kjellson', 'profile_path': None}, {'credit_id': '54af17fcc3a368399e002b91', 'department': 'Production', 'gender': 0, 'id': 1150129, 'job': 'Co-Producer', 'name': 'Katja Adomeit', 'profile_path': None}, {'credit_id': '54af18099251414ae5000451', 'department': 'Production', 'gender': 0, 'id': 1410877, 'job': 'Co-Producer', 'name': 'Marina Perales', 'profile_path': None}, {'credit_id': '54af18159251413fe1000485', 'department': 'Production', 'gender': 2, 'id': 79970, 'job': 'Co-Producer', 'name': 'Yngve Sæther', 'profile_path': '/in2EiaHs7uFfbev4W3co1oDZnvT.jpg'}, {'credit_id': '54af18239251411ef60029e8', 'department': 'Sound', 'gender': 0, 'id': 71620, 'job': 'Original Music Composer', 'name': 'Ola Fløttum', 'profile_path': None}, {'credit_id': '54af182e9251413fe1000489', 'department': 'Camera', 'gender': 0, 'id': 111353, 'job': 'Director of Photography', 'name': 'Fredrik Wenzel', 'profile_path': None}, {'credit_id': '54af183d9251411ef60029eb', 'department': 'Editing', 'gender': 0, 'id': 1154242, 'job': 'Editor', 'name': 'Jacob Secher Schulsinger', 'profile_path': None}, {'credit_id': '54af18499251414a3d000441', 'department': 'Production', 'gender': 0, 'id': 1410878, 'job': 'Casting', 'name': 'Katja Wik', 'profile_path': None}, {'credit_id': '54af185592514169e0002b00', 'department': 'Art', 'gender': 0, 'id': 1156989, 'job': 'Production Design', 'name': 'Josefin Åsberg', 'profile_path': None}, {'credit_id': '54af1862c3a3682e400004a7', 'department': 'Art', 'gender': 0, 'id': 1156989, 'job': 'Art Direction', 'name': 'Josefin Åsberg', 'profile_path': None}, {'credit_id': '54af186fc3a3684abf001002', 'department': 'Costume & Make-Up', 'gender': 0, 'id': 1410879, 'job': 'Costume Design', 'name': 'Pia Aleborg', 'profile_path': None}] 265189 + +id duplicate rows for id: 87 +duplicate index numbers (id, showing up to 200): [676, 838, 949, 1465, 2564, 4114, 4356, 5130, 5535, 5710, 5865, 7345, 8068, 9165, 9327, 9576, 10419, 11155, 11342, 12066, 13220, 13261, 13375, 13603, 13946, 14000, 14012, 15074, 15702, 15765, 16167, 16764, 17229, 19889, 19924, 20842, 20898, 21115, 21164, 21853, 22150, 23043, 23533, 24163, 24843, 25884, 25885, 25886, 25887, 25888, 25889, 25890, 25891, 25892, 25893, 25894, 25895, 25896, 25897, 25949, 25950, 25953, 25954, 25955, 25956, 25957, 25965, 25966, 25967, 25968, 25969, 25974, 25975, 26638, 28873, 29387, 30013, 33196, 33755, 33838, 35809, 38882, 40051, 40287, 44831, 44836, 45275] + id +25885 3057 +25950 3057 +33838 4912 +5865 4912 +9165 5511 +7345 5511 +25895 8767 +25969 8767 +25893 9755 +25967 9755 +44831 10991 +4114 10991 +14012 11115 +24843 11115 +25890 11752 +25957 11752 +44836 12600 +5535 12600 +11342 13209 +15765 13209 +12066 14788 +10419 14788 +5130 15028 +33755 15028 +22150 18440 +14000 18440 +15074 22649 +949 22649 +8068 23305 +9327 23305 +25888 24023 +25955 24023 +25956 24026 +25889 24026 +17229 25541 +23043 25541 +15702 42495 +29387 42495 +25896 43629 +25974 43629 +9576 69234 +26638 69234 +20842 77221 +11155 77221 +21115 84198 +2564 84198 +40287 97995 +13946 97995 +16167 99080 +38882 99080 +1465 105045 +676 105045 +20898 109962 +5710 109962 +23533 110428 +4356 110428 +25884 116723 +25949 116723 +19889 119916 +21164 119916 +25968 123634 +25894 123634 +25953 125458 +25886 125458 +30013 132641 +838 132641 +16764 141971 +13261 141971 +13375 141971 +25891 142563 +25965 142563 +21853 152795 +19924 152795 +25892 157301 +25966 157301 +13220 159849 +35809 159849 +28873 168538 +13603 168538 +25975 187156 +25897 187156 +25954 199591 +25887 199591 +24163 265189 +45275 265189 +33196 298721 +40051 298721 +CLEANED: /Users/nicky/Projects/CentralLibraryData/credits.csv -> /Users/nicky/Projects/CentralLibraryData/cleaned_data/credits.csv (dupes=37, nulls=0) +validation passed: credits.csv diff --git a/export_java_class_data.py b/export_java_class_data.py new file mode 100644 index 0000000..7c219aa --- /dev/null +++ b/export_java_class_data.py @@ -0,0 +1,455 @@ +from __future__ import annotations + +import argparse +import ast +import csv +import logging +import re +from datetime import datetime +from pathlib import Path + +import pandas as pd + + +ROOT = Path(__file__).resolve().parent +OUTPUT_DIR = ROOT / "java_exports" + +NULL_TOKENS = {"", "null", "none", "na", "n/a", "nan", ""} + +LOGGER = logging.getLogger("export_java") + +DATE_PATTERNS = ( + "%Y/%m/%d", + "%d/%m/%Y", + "%m/%d/%Y", + "%d-%m-%Y", + "%m-%d-%Y", + "%d %b %Y", + "%d %B %Y", + "%b %d, %Y", + "%B %d, %Y", +) + + +def normalize_text(value: object) -> str: + if value is None: + return "" + text = str(value).strip() + text = re.sub(r"\s+", " ", text) + if text.lower() in NULL_TOKENS: + return "" + return text + + +def configure_logging(verbose: bool = False) -> None: + level = logging.DEBUG if verbose else logging.INFO + logging.basicConfig(level=level, format="%(asctime)s %(levelname)s %(message)s") + + +def standardize_date(value: object) -> str: + text = normalize_text(value) + if not text: + return "" + if re.fullmatch(r"\d{4}-\d{2}-\d{2}", text): + return text + for pattern in DATE_PATTERNS: + try: + return datetime.strptime(text, pattern).date().isoformat() + except ValueError: + continue + try: + normalized = text.replace("Z", "+00:00") + return datetime.fromisoformat(normalized).date().isoformat() + except ValueError: + return "" + + +def assert_numeric_column(df: pd.DataFrame, column: str, dataset_name: str) -> None: + if column not in df.columns: + raise ValueError(f"{dataset_name} missing expected numeric column: {column}") + non_empty = df[column].map(normalize_text) + non_empty = non_empty[non_empty != ""] + if int(pd.to_numeric(non_empty, errors="coerce").isna().sum()) > 0: + raise ValueError(f"{dataset_name} contains non-numeric values in {column}") + + +def verify_row_count(dataset_name: str, input_rows: int, output_rows: int) -> None: + if output_rows <= 0: + LOGGER.warning("%s output has no rows after filtering", dataset_name) + elif output_rows > input_rows: + raise ValueError(f"{dataset_name} output rows exceed input rows ({output_rows} > {input_rows})") + else: + LOGGER.info("%s row count check input=%s output=%s", dataset_name, input_rows, output_rows) + + +def clean_frame(df: pd.DataFrame, required_columns: list[str]) -> pd.DataFrame: + cleaned = df.copy() + + for column in cleaned.columns: + cleaned[column] = cleaned[column].map(normalize_text) + + for column in required_columns: + cleaned = cleaned[cleaned[column] != ""] + + cleaned = cleaned.drop_duplicates().reset_index(drop=True) + + # Write a literal placeholder for optional blanks in exported CSVs. + cleaned = cleaned.replace("", "empty") + + return cleaned + + +def require_columns(df: pd.DataFrame, required_columns: list[str], dataset_name: str) -> None: + missing = [column for column in required_columns if column not in df.columns] + if missing: + raise ValueError(f"{dataset_name} missing required columns: {missing}") + + +def enforce_unique_ids(df: pd.DataFrame, dataset_name: str, id_column: str = "ID") -> pd.DataFrame: + if id_column not in df.columns: + raise ValueError(f"{dataset_name} missing required id column: {id_column}") + duplicate_count = int(df[id_column].duplicated().sum()) + if duplicate_count: + raise ValueError(f"{dataset_name} has duplicate IDs in {id_column}: {duplicate_count}") + return df + + +def assert_files_exist(paths: list[Path]) -> None: + missing = [str(path) for path in paths if not path.exists()] + if missing: + raise FileNotFoundError(f"Missing required input files: {missing}") + + +def resolve_input_path(data_root: Path, cleaned_root: Path, filename: str, prefer_cleaned: bool = False) -> Path: + candidates = ( + [cleaned_root / filename, data_root / filename] + if prefer_cleaned + else [data_root / filename, cleaned_root / filename] + ) + return next((p for p in candidates if p.exists()), candidates[0]) + + +def resolve_first_existing(candidates: list[Path]) -> Path: + for candidate in candidates: + if candidate.exists(): + return candidate + return candidates[0] + + +def read_csv_as_strings(path: Path, skip_bad_lines: bool = False) -> pd.DataFrame: + read_kwargs: dict[str, object] = { + "dtype": "string", + "keep_default_na": False, + } + if skip_bad_lines: + read_kwargs.update({"engine": "python", "on_bad_lines": "skip"}) + return pd.read_csv(path, **read_kwargs) + + +def dedupe_by_id(df: pd.DataFrame, id_column: str) -> pd.DataFrame: + ranked = df.copy() + non_id_columns = [column for column in ranked.columns if column != id_column] + ranked["_completeness"] = ranked[non_id_columns].apply( + lambda row: sum(1 for value in row if normalize_text(value)), + axis=1, + ) + ranked = ranked.sort_values(by=[id_column, "_completeness"], ascending=[True, False], kind="mergesort") + ranked = ranked.drop_duplicates(subset=[id_column], keep="first") + return ranked.drop(columns=["_completeness"]).reset_index(drop=True) + + +def read_periodical_issues(path: Path) -> pd.DataFrame: + rows: list[dict[str, str]] = [] + + with path.open("r", encoding="utf-8-sig", newline="") as handle: + reader = csv.reader(handle) + header = next(reader) + + for row in reader: + if not row: + continue + if len(row) < len(header): + row = row + [""] * (len(header) - len(row)) + elif len(row) > len(header): + row = row[:2] + [",".join(row[2:-5]), *row[-5:]] + rows.append(dict(zip(header, row, strict=False))) + + return pd.DataFrame(rows, columns=header, dtype="string") + + +def extract_first_present(*values: object) -> str: + for value in values: + text = normalize_text(value) + if text: + return text + return "" + + +def build_books(data_root: Path, cleaned_root: Path) -> pd.DataFrame: + books_path = resolve_input_path(data_root, cleaned_root, "pg_catalog.csv", prefer_cleaned=True) + books = read_csv_as_strings(books_path, skip_bad_lines=True) + require_columns(books, ["Text#", "Title", "Authors"], "pg_catalog.csv") + text_number = pd.to_numeric(books["Text#"], errors="coerce") + invalid_id_count = int(text_number.isna().sum()) + if invalid_id_count: + LOGGER.warning("pg_catalog.csv has %s rows with non-numeric Text#; they will be dropped", invalid_id_count) + LOGGER.info("books input rows=%s source=%s", len(books), books_path) + input_rows = len(books) + output = pd.DataFrame( + { + "Id": books["Text#"], + "Title": books["Title"], + "Location": "", + "Author": books["Authors"], + "ISBN": "", + "Pages": "", + "Genre": books.apply( + lambda row: extract_first_present(row.get("Bookshelves"), row.get("Subjects")), + axis=1, + ), + } + ) + output = clean_frame(output, ["Id", "Title"]) + output = output[output["Id"].str.fullmatch(r"\d+") == True] + output = dedupe_by_id(output, "Id") + output = enforce_unique_ids(output, "Book", "Id") + verify_row_count("Book", input_rows, len(output)) + LOGGER.info("books output rows=%s", len(output)) + return output + + +def parse_volume_issue(description: object) -> tuple[str, str]: + text = normalize_text(description) + if not text: + return "", "" + + volume_match = re.search(r"volume\s*([0-9A-Za-z.-]+)", text, flags=re.IGNORECASE) + issue_match = re.search(r"(?:number|no\.?|issue)\s*([0-9A-Za-z.-]+)", text, flags=re.IGNORECASE) + + volume = volume_match.group(1) if volume_match else "" + issue_number = issue_match.group(1) if issue_match else "" + return normalize_text(volume), normalize_text(issue_number) + + +def build_periodicals(data_root: Path, cleaned_root: Path) -> pd.DataFrame: + titles_path = resolve_input_path(data_root, cleaned_root, "periodical-titles.csv", prefer_cleaned=True) + issues_path = resolve_input_path(data_root, cleaned_root, "periodical-issues.csv", prefer_cleaned=True) + titles = read_csv_as_strings(titles_path, skip_bad_lines=True).rename( + columns={"id": "title_id_ref", "title": "periodical_title"} + ) + issues = read_periodical_issues(issues_path).rename( + columns={"id": "issue_id", "title": "issue_title", "description": "issue_description", "date": "issue_date"} + ) + require_columns(titles, ["title_id_ref", "periodical_title", "publisher", "place", "issn"], "periodical-titles.csv") + require_columns(issues, ["issue_id", "issue_title", "issue_description", "issue_date", "title_id"], "periodical-issues.csv") + LOGGER.info("periodicals input rows titles=%s issues=%s", len(titles), len(issues)) + input_rows = len(issues) + + orphan_mask = ~issues["title_id"].isin(titles["title_id_ref"]) + orphan_count = int(orphan_mask.sum()) + if orphan_count: + LOGGER.warning("periodicals: %s issues have unmatched title_id (orphan FK)", orphan_count) + + merged = issues.merge( + titles[["title_id_ref", "periodical_title", "publisher", "place", "issn"]], + left_on="title_id", + right_on="title_id_ref", + how="left", + ) + + volume_issue = merged["issue_description"].map(parse_volume_issue) + output = pd.DataFrame( + { + "Id": merged["issue_id"], + "Title": merged["periodical_title"].fillna(merged["issue_title"]), + "Location": merged["place"], + "Publisher": merged["publisher"], + "ISSN": merged["issn"], + "Volume": [item[0] for item in volume_issue], + "Issue #": [item[1] for item in volume_issue], + "Publication date": merged["issue_date"].map(standardize_date), + } + ) + output = clean_frame(output, ["Id", "Title"]) + output = dedupe_by_id(output, "Id") + output = enforce_unique_ids(output, "Periodical", "Id") + verify_row_count("Periodical", input_rows, len(output)) + LOGGER.info("periodicals stage rows output=%s", len(output)) + return output + + +def build_music(data_root: Path, cleaned_root: Path) -> pd.DataFrame: + music_path = resolve_input_path(data_root, cleaned_root, "tcc_ceds_music.csv", prefer_cleaned=True) + music = read_csv_as_strings(music_path) + require_columns(music, ["artist_name", "track_name", "release_date", "genre", "lyrics", "len"], "tcc_ceds_music.csv") + assert_numeric_column(music, music.columns[0], "tcc_ceds_music.csv") + LOGGER.info("music input rows=%s source=%s", len(music), music_path) + input_rows = len(music) + id_column = music.columns[0] + output = pd.DataFrame( + { + "ID": music[id_column], + "Title": music["track_name"], + "Location": "", + "Artist": music["artist_name"], + "Date": music["release_date"].map(standardize_date), + "Genre": music["genre"], + "Lyrics": music["lyrics"], + "Length": music["len"], + } + ) + output = clean_frame(output, ["ID", "Title", "Artist"]) + output = dedupe_by_id(output, "ID") + output = enforce_unique_ids(output, "Music", "ID") + verify_row_count("Music", input_rows, len(output)) + LOGGER.info("music stage rows output=%s", len(output)) + return output + + +def parse_director_by_tmdb_id(path: Path) -> dict[str, str]: + directors: dict[str, str] = {} + + with path.open("r", encoding="utf-8-sig", newline="") as handle: + reader = csv.DictReader(handle) + for row in reader: + tmdb_id = normalize_text(row.get("id")) + crew_text = normalize_text(row.get("crew")) + if not tmdb_id or not crew_text: + continue + + try: + crew_items = ast.literal_eval(crew_text) + except (SyntaxError, ValueError): + continue + + if not isinstance(crew_items, list): + continue + + director_name = "" + for item in crew_items: + if not isinstance(item, dict): + continue + job = normalize_text(item.get("job")) + department = normalize_text(item.get("department")) + if job.lower() == "director" or department.lower() == "directing": + director_name = normalize_text(item.get("name")) + if director_name: + break + + directors[tmdb_id] = director_name + + return directors + + +def build_dvds(data_root: Path, cleaned_root: Path) -> pd.DataFrame: + links_path = resolve_input_path(data_root, cleaned_root, "links.csv", prefer_cleaned=True) + ratings_path = resolve_first_existing( + [ + resolve_input_path(data_root, cleaned_root, "ratings.csv", prefer_cleaned=True), + resolve_input_path(data_root, cleaned_root, "ratings_small.csv", prefer_cleaned=True), + ] + ) + credits_path = resolve_input_path(data_root, cleaned_root, "credits.csv", prefer_cleaned=True) + links = read_csv_as_strings(links_path) + require_columns(links, ["movieId", "tmdbId"], "links.csv") + assert_numeric_column(links, "movieId", "links.csv") + ratings = pd.read_csv(ratings_path, usecols=["movieId", "rating"]) + if pd.to_numeric(ratings["rating"], errors="coerce").isna().any(): + raise ValueError("ratings.csv contains non-numeric values in rating") + LOGGER.info("dvd input rows links=%s ratings=%s", len(links), len(ratings)) + input_rows = len(links) + avg_rating = ( + ratings.groupby("movieId", dropna=False)["rating"] + .mean() + .round(2) + .astype(str) + .to_dict() + ) + directors = parse_director_by_tmdb_id(credits_path) + + output = pd.DataFrame( + { + "Id": links["movieId"], + "Title": "", + "Location": "", + "Director": links["tmdbId"].map(lambda value: directors.get(normalize_text(value), "")), + "Rating": links["movieId"].map(lambda value: avg_rating.get(pd.to_numeric(value, errors="coerce"), "")), + "Genre": "", + } + ) + output = clean_frame(output, ["Id"]) + output = dedupe_by_id(output, "Id") + output = enforce_unique_ids(output, "DVD", "Id") + verify_row_count("DVD", input_rows, len(output)) + LOGGER.info("dvd stage rows output=%s", len(output)) + return output + + +def write_output(filename: str, df: pd.DataFrame) -> None: + OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + target = OUTPUT_DIR / filename + if len(df) == 0: + LOGGER.warning("skipping %s — no rows to write", filename) + return + df.to_csv(target, index=False) + LOGGER.info("wrote rows=%s target=%s", len(df), target) + + +def get_required_input_paths(data_root: Path, cleaned_root: Path) -> list[Path]: + ratings_path = resolve_first_existing( + [ + resolve_input_path(data_root, cleaned_root, "ratings.csv", prefer_cleaned=True), + resolve_input_path(data_root, cleaned_root, "ratings_small.csv", prefer_cleaned=True), + ] + ) + return [ + resolve_input_path(data_root, cleaned_root, "pg_catalog.csv", prefer_cleaned=True), + resolve_input_path(data_root, cleaned_root, "periodical-titles.csv", prefer_cleaned=True), + resolve_input_path(data_root, cleaned_root, "periodical-issues.csv", prefer_cleaned=True), + resolve_input_path(data_root, cleaned_root, "tcc_ceds_music.csv", prefer_cleaned=True), + resolve_input_path(data_root, cleaned_root, "links.csv", prefer_cleaned=True), + ratings_path, + resolve_input_path(data_root, cleaned_root, "credits.csv", prefer_cleaned=True), + ] + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Export class-specific CSV files for Java integration.") + parser.add_argument("--sample", action="store_true", help="Run using sample data in sample_data_java.") + parser.add_argument( + "--sample-group", + choices=["valid_records", "invalid_records", "edge_case_records"], + default="valid_records", + help="Sample dataset group to use with --sample.", + ) + parser.add_argument("--verbose", action="store_true", help="Enable debug logging.") + return parser.parse_args() + + +def main() -> int: + args = parse_args() + configure_logging(args.verbose) + + if args.sample: + data_root = ROOT / "sample_data_java" / args.sample_group + cleaned_root = data_root + else: + data_root = ROOT + cleaned_root = ROOT / "cleaned_data" + + try: + required_paths = get_required_input_paths(data_root, cleaned_root) + assert_files_exist(required_paths) + + write_output("Book.csv", build_books(data_root, cleaned_root)) + write_output("DVD.csv", build_dvds(data_root, cleaned_root)) + write_output("Periodical.csv", build_periodicals(data_root, cleaned_root)) + write_output("Music.csv", build_music(data_root, cleaned_root)) + return 0 + except Exception: + LOGGER.exception("export pipeline failed") + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) \ No newline at end of file diff --git a/generate_java_handoff_package.py b/generate_java_handoff_package.py new file mode 100644 index 0000000..6ed89a9 --- /dev/null +++ b/generate_java_handoff_package.py @@ -0,0 +1,512 @@ +from __future__ import annotations + +from datetime import date, timedelta +from pathlib import Path +import random +import re +import shutil + +import pandas as pd + + +ROOT = Path(__file__).resolve().parent +SOURCE_DIR = ROOT / "java_exports" +TARGET_DIR = ROOT / "Generated java handoff package" +ZIP_BASE = ROOT / "Generated java handoff package" + + +SOURCE_FILES = { + "Book": SOURCE_DIR / "Book.csv", + "DVD": SOURCE_DIR / "DVD.csv", + "Periodical": SOURCE_DIR / "Periodical.csv", + "Music": SOURCE_DIR / "Music.csv", +} + + +FIRST_NAMES = [ + "Emma", "Liam", "Olivia", "Noah", "Ava", "Elijah", "Sophia", "Lucas", "Isabella", "Mason", + "Mia", "Ethan", "Charlotte", "James", "Amelia", "Benjamin", "Harper", "Logan", "Evelyn", "Alexander", + "Abigail", "Henry", "Ella", "Michael", "Scarlett", "Daniel", "Grace", "Jackson", "Chloe", "Sebastian", +] + +LAST_NAMES = [ + "Anderson", "Bennett", "Carter", "Donovan", "Ellis", "Fletcher", "Garcia", "Hughes", "Iverson", "Jensen", + "Kensington", "Lawson", "Montgomery", "Nolan", "Owens", "Prescott", "Quincy", "Ramirez", "Sinclair", "Turner", + "Underwood", "Vasquez", "Whitaker", "Xu", "Young", "Zimmerman", "Hawthorne", "Blackwell", "Dawson", "Marlowe", +] + +MOVIE_ADJECTIVES = [ + "Silent", "Hidden", "Golden", "Broken", "Midnight", "Crimson", "Fading", "Last", "Rising", "Forgotten", + "Electric", "Velvet", "Iron", "Wandering", "Burning", "Shattered", "Secret", "Lonely", "Radiant", "Final", +] + +MOVIE_NOUNS = [ + "Harbor", "Empire", "Echo", "Promise", "Horizon", "Garden", "Signal", "River", "City", "Letter", + "Voyage", "Kingdom", "Trial", "Shadow", "Paradox", "Summit", "Mirage", "Chronicle", "Frontier", "Lantern", +] + +PERIODICAL_DESCRIPTORS = [ + "Business", "Policy", "Science", "Health", "Technology", "Arts", "Culture", "Global", "Economic", "Legal", + "Education", "Literary", "Environmental", "Medical", "Industry", "Public Affairs", "Research", "Finance", "Innovation", "Civic", +] + +PERIODICAL_FORMATS = ["Review", "Journal", "Quarterly", "Digest", "Chronicle", "Bulletin", "Observer", "Forum", "Report", "Times"] + +BOOK_PATTERNS = [ + "The {adj} {noun}", + "{noun} of the {adj} Era", + "A {adj} {noun}", +] + +DVD_PATTERNS = [ + "The {adj} {noun}", + "{adj} {noun}", + "{noun} at Midnight", +] + +MUSIC_PATTERNS = [ + "{adj} {noun}", + "{noun} in the {adj} Light", + "{adj} Hearts, {noun} Nights", +] + +BOOK_NOUNS = [ + "Atlas", "Chronicles", "Memoir", "Ledger", "Notebook", "Companion", "Guide", "Archive", "Record", "Anthology", +] + +PUBLISHER_PREFIXES = [ + "Northbridge", "Redwood", "Harbor", "Summit", "Stonefield", "Bluebird", "Maple", "Riverside", "Crescent", "Elmwood", + "Granite", "Pinecrest", "Westgate", "Hillside", "Broadview", "Lakeshore", "Brighton", "Ironwood", "Fairmont", "Crown", +] + +PUBLISHER_SUFFIXES = ["Press", "Publishing", "Media Group", "House", "Journals", "Publications", "Review", "Books", "Editions", "Works"] + +LOCATIONS = [ + "Main Library - Floor 1", "Main Library - Floor 2", "Downtown Branch", "Riverside Branch", "North Campus Library", + "South Campus Library", "Reference Wing", "Archives Room", "Media Center", "Periodicals Desk", "Stacks A", "Stacks B", +] + +MUSIC_GENRES = ["Pop", "Rock", "Jazz", "Hip-Hop", "Folk", "Classical", "R&B", "Soul", "Electronic", "Blues"] +BOOK_GENRES = ["History", "Biography", "Politics", "Science", "Literature", "Philosophy", "Travel", "Poetry", "Law", "Art"] +DVD_GENRES = ["Drama", "Comedy", "Thriller", "Action", "Documentary", "Adventure", "Sci-Fi", "Mystery", "Romance", "Animation"] + +LYRICS_LINES = [ + "Streetlights glow while the city hums below", + "Every promise finds a place to rest", + "A borrowed summer in a paper sky", + "Hold the moment like a photograph", + "Morning breaks and everything feels new", + "We were echoes learning how to sing", + "The night was wide and full of quiet fire", + "Turn the page and let the chorus rise", +] + +SAFE_SONG_TITLES = [ + "Northern Lights", "Quiet Avenue", "Summer Lantern", "Paper Planes", "Horizon Bloom", "Silver Harbor", + "Velvet Morning", "Clear Skies", "Open Window", "Golden Mile", "Moonlit Station", "Riverline", +] + +SAFE_MUSIC_ARTISTS = [ + "Avery Lane", "Mila Hart", "Noah Finch", "Iris Monroe", "Leo Bennett", "Aria Stone", + "Eden Park", "Rowan Hale", "Mason Vale", "Sofia Clark", "Elliot Brooks", "Nina West", +] + +PROFANITY_PATTERNS = [ + re.compile(r"\bf+u+c*k+\b", flags=re.IGNORECASE), + re.compile(r"\bs+h+i+t+\b", flags=re.IGNORECASE), + re.compile(r"\bb+i+t+c+h+\b", flags=re.IGNORECASE), + re.compile(r"\ba+s+s+h+o+l+e+\b", flags=re.IGNORECASE), + re.compile(r"\bb+a+s+t+a+r+d+\b", flags=re.IGNORECASE), +] + +HATE_PATTERNS = [ + re.compile(r"\bwhite\s+power\b", flags=re.IGNORECASE), + re.compile(r"\bkill\s+all\b", flags=re.IGNORECASE), + re.compile(r"\bexterminate\b", flags=re.IGNORECASE), + re.compile(r"\bhate\s+all\b", flags=re.IGNORECASE), +] + +HARMFUL_MUSIC_PATTERNS = [*PROFANITY_PATTERNS, *HATE_PATTERNS] + +PERIODICAL_TITLE_REPLACEMENTS = [ + (re.compile(r"\blesbians?\s+on\s+the\s+loose\b", flags=re.IGNORECASE), "Community Life Review"), +] + +TITLE_QUALIFIER_FIRST = [ + "North", "South", "East", "West", "Harbor", "River", "Civic", "Metro", "Central", "Summit", + "Coastal", "Valley", "Forest", "Lake", "Prairie", "Granite", "Crown", "Elm", "Pine", "Maple", +] + +TITLE_QUALIFIER_SECOND = [ + "Metro", "Weekly", "Regional", "National", "Global", "Urban", "Coastal", "Valley", "Central", "Evening", + "Morning", "Civic", "Public", "Herald", "Outlook", "Digest", "Review", "Monthly", "Quarterly", "International", +] + +PERIODICAL_VARIANT_PATTERNS = [ + "{title}: {first} {second}", + "{title} ({first} {second} Edition)", + "{title} - {first} {second} Review", + "{title}: {first} {second} Bulletin", + "{title} | {first} {second} Desk", + "{title}: {first} {second} Journal", +] + + +def is_missing(value: object) -> bool: + if pd.isna(value): + return True + text = str(value).strip() + return text == "" or text.lower() == "empty" + + +def normalize_id_token(value: object, fallback_index: int) -> str: + if not is_missing(value): + raw = str(value).strip() + digits = "".join(ch for ch in raw if ch.isdigit()) + if digits: + return digits + return raw.replace(" ", "") + return str(100000 + fallback_index) + + +def resolve_id_column(frame: pd.DataFrame) -> str: + if "Id" in frame.columns: + return "Id" + if "ID" in frame.columns: + return "ID" + return frame.columns[0] + + +def stable_rng(class_name: str, column: str, row_index: int, id_token: str) -> random.Random: + seed = f"{class_name}|{column}|{row_index}|{id_token}" + return random.Random(seed) + + +def pick_full_name(rng: random.Random) -> str: + return f"{rng.choice(FIRST_NAMES)} {rng.choice(LAST_NAMES)}" + + +def movie_title(rng: random.Random) -> str: + if rng.random() < 0.35: + return f"The {rng.choice(MOVIE_ADJECTIVES)} {rng.choice(MOVIE_NOUNS)}" + return f"{rng.choice(MOVIE_ADJECTIVES)} {rng.choice(MOVIE_NOUNS)}" + + +def book_title(rng: random.Random) -> str: + return f"{rng.choice(MOVIE_ADJECTIVES)} {rng.choice(BOOK_NOUNS)}" + + +def publisher_name(rng: random.Random) -> str: + return f"{rng.choice(PUBLISHER_PREFIXES)} {rng.choice(PUBLISHER_SUFFIXES)}" + + +def periodical_title(row_index: int, id_token: str) -> str: + rng = stable_rng("Periodical", "title", row_index, id_token) + descriptor = rng.choice(PERIODICAL_DESCRIPTORS) + fmt = rng.choice(PERIODICAL_FORMATS) + return f"{descriptor} {fmt}" + + +def class_title(class_name: str, row_index: int, id_token: str) -> str: + rng = stable_rng(class_name, "title", row_index, id_token) + adj = rng.choice(MOVIE_ADJECTIVES) + + if class_name == "Book": + noun = rng.choice(BOOK_NOUNS) + base = rng.choice(BOOK_PATTERNS).format(adj=adj, noun=noun) + return base + + if class_name == "DVD": + noun = rng.choice(MOVIE_NOUNS) + base = rng.choice(DVD_PATTERNS).format(adj=adj, noun=noun) + return base + + if class_name == "Music": + noun = rng.choice(MOVIE_NOUNS) + base = rng.choice(MUSIC_PATTERNS).format(adj=adj, noun=noun) + return base + + return periodical_title(row_index, id_token) + + +def normalize_existing_title(value: object) -> str: + text = str(value).strip() + if text and text.lower().startswith("generated "): + return "" + return text + + +def strip_author_years(value: object) -> str: + text = str(value).strip() + if not text: + return "" + + # Remove year metadata like ", 1743-1826", "(1901-1987)", "b. 1940", "d. 2005", "5 BCE-65". + text = re.sub(r"\(\s*(?:b\.?|d\.?)?\s*\d{1,4}\??\s*(?:bce|bc|ce|ad)?\s*(?:[-/]|to)?\s*\d{0,4}\??\s*(?:bce|bc|ce|ad)?\s*\)", "", text, flags=re.IGNORECASE) + text = re.sub(r"\b(?:b\.?|d\.?|born|died)\s*\d{1,4}\??\s*(?:bce|bc|ce|ad)?\b", "", text, flags=re.IGNORECASE) + text = re.sub(r"\b\d{1,4}\??\s*(?:bce|bc|ce|ad)?\s*[-/]\s*\d{1,4}\??\s*(?:bce|bc|ce|ad)?\b", "", text, flags=re.IGNORECASE) + text = re.sub(r"\b\d{3,4}\??\s*(?:bce|bc|ce|ad)\b", "", text, flags=re.IGNORECASE) + text = re.sub(r",\s*\d{3,4}\s*[-/]\s*\d{2,4}\b", "", text, flags=re.IGNORECASE) + text = re.sub(r"\s{2,}", " ", text) + text = re.sub(r"\s*;\s*", "; ", text) + text = re.sub(r"\s*,\s*", ", ", text) + text = re.sub(r"(?:,\s*){2,}", ", ", text) + text = re.sub(r"(?:;\s*){2,}", "; ", text) + text = re.sub(r"\s+,", ",", text) + return text.strip(" ,;") + + +def qualifier_from_rank(rank: int) -> str: + first_len = len(TITLE_QUALIFIER_FIRST) + second_len = len(TITLE_QUALIFIER_SECOND) + space = first_len * second_len + + first = TITLE_QUALIFIER_FIRST[(rank // second_len) % first_len] + second = TITLE_QUALIFIER_SECOND[rank % second_len] + cycle = rank // space + if cycle == 0: + return f"{first} {second}" + return f"{first} {second} {TITLE_QUALIFIER_FIRST[cycle % first_len]}" + + +def variant_periodical_title(base_title: str, rank: int, row_index: int) -> str: + qualifier = qualifier_from_rank(rank) + first, _, second_part = qualifier.partition(" ") + second = second_part or "Review" + + chooser = stable_rng("Periodical", "variant", row_index, f"{base_title}|{rank}") + pattern = chooser.choice(PERIODICAL_VARIANT_PATTERNS) + return pattern.format(title=base_title, first=first, second=second) + + +def enforce_periodical_title_uniqueness(frame: pd.DataFrame) -> pd.DataFrame: + if "Title" not in frame.columns: + return frame + + result = frame.copy() + result["Title"] = result["Title"].fillna("").map(lambda value: str(value).strip()) + + used_titles = set(result["Title"].tolist()) + grouped = result.groupby("Title", dropna=False).indices + + for base_title, raw_indexes in grouped.items(): + title_text = str(base_title).strip() + if title_text == "": + continue + if len(raw_indexes) <= 1: + continue + + indexes = sorted(int(idx) for idx in raw_indexes) + for rank, row_index in enumerate(indexes): + qualifier_rank = rank + while True: + candidate = variant_periodical_title(title_text, qualifier_rank, row_index) + current_title = str(result.at[row_index, "Title"]).strip() + if candidate == current_title: + break + if candidate not in used_titles: + used_titles.discard(current_title) + result.at[row_index, "Title"] = candidate + used_titles.add(candidate) + break + qualifier_rank += 1 + + return result + + +def sanitize_periodical_titles(frame: pd.DataFrame) -> pd.DataFrame: + if "Title" not in frame.columns: + return frame + + result = frame.copy() + id_column = resolve_id_column(result) + + for idx in result.index: + current_title = str(result.at[idx, "Title"]).strip() + if not current_title: + continue + + replacement = None + for pattern, replacement_base in PERIODICAL_TITLE_REPLACEMENTS: + if pattern.search(current_title): + replacement = replacement_base + break + + if replacement is not None: + id_token = normalize_id_token(result.at[idx, id_column], int(idx) + 1) + chooser = stable_rng("Periodical", "replacement-title", int(idx), id_token) + variant = chooser.choice(["Digest", "Review", "Journal", "Chronicle"]) + result.at[idx, "Title"] = f"{replacement} {variant}" + + return result + + +def contains_harmful_music_text(text: str) -> bool: + lowered = text.lower() + for pattern in HARMFUL_MUSIC_PATTERNS: + if pattern.search(lowered): + return True + return False + + +def replacement_music_row(row_index: int, id_token: str) -> dict[str, str]: + rng = stable_rng("Music", "replacement", row_index, id_token) + base = date(2018, 1, 1) + lyric_lines = [rng.choice(LYRICS_LINES) for _ in range(rng.randint(2, 4))] + return { + "Title": rng.choice(SAFE_SONG_TITLES), + "Location": rng.choice(LOCATIONS), + "Artist": rng.choice(SAFE_MUSIC_ARTISTS), + "Date": (base + timedelta(days=rng.randint(0, 2800))).isoformat(), + "Genre": rng.choice(MUSIC_GENRES), + "Lyrics": "; ".join(lyric_lines), + "Length": str(rng.randint(140, 360)), + } + + +def sanitize_music_content(frame: pd.DataFrame) -> pd.DataFrame: + id_column = resolve_id_column(frame) + + result = frame.copy() + scanned_columns = [col for col in ["Title", "Artist", "Genre", "Lyrics"] if col in result.columns] + + for idx in result.index: + text_blob = " ".join(str(result.at[idx, col]) for col in scanned_columns) + if not contains_harmful_music_text(text_blob): + continue + + id_token = normalize_id_token(result.at[idx, id_column], int(idx) + 1) + replacement = replacement_music_row(int(idx), id_token) + for col, value in replacement.items(): + if col in result.columns: + result.at[idx, col] = value + + return result + + +def generated_value(class_name: str, column: str, row_index: int, id_token: str) -> str: + lower_col = column.lower() + rng = stable_rng(class_name, column, row_index, id_token) + + if lower_col in {"id", "id#"}: + return id_token + + if lower_col == "title": + if class_name == "DVD": + return movie_title(rng) + if class_name == "Book": + return book_title(rng) + if class_name == "Music": + return f"{rng.choice(MOVIE_ADJECTIVES)} {rng.choice(MOVIE_NOUNS)}" + return f"{rng.choice(MOVIE_ADJECTIVES)} {rng.choice(MOVIE_NOUNS)} Review" + + if lower_col == "location": + return rng.choice(LOCATIONS) + + if lower_col in {"author", "artist", "director"}: + return pick_full_name(rng) + + if lower_col == "publisher": + return publisher_name(rng) + + if lower_col == "isbn": + seed = int(id_token[-6:]) if id_token[-6:].isdigit() else row_index + 1 + return f"978{seed:010d}"[:13] + + if lower_col == "pages": + return str(rng.randint(120, 920)) + + if lower_col == "genre": + if class_name == "Book": + return rng.choice(BOOK_GENRES) + if class_name == "DVD": + return rng.choice(DVD_GENRES) + return rng.choice(MUSIC_GENRES) + + if lower_col == "rating": + return f"{rng.uniform(2.8, 4.9):.1f}" + + if lower_col == "issn": + left = (1000 + (row_index % 9000)) + right = (1000 + ((row_index * 7) % 9000)) + return f"{left:04d}-{right:04d}" + + if lower_col == "volume": + return str(rng.randint(1, 48)) + + if lower_col in {"issue #", "issue number"}: + return str(rng.randint(1, 24)) + + if lower_col in {"publication date", "date"}: + base = date(2020, 1, 1) + return (base + timedelta(days=row_index % 2000)).isoformat() + + if lower_col == "lyrics": + line_count = rng.randint(2, 4) + return "; ".join(rng.choice(LYRICS_LINES) for _ in range(line_count)) + + if lower_col == "length": + return str(rng.randint(120, 420)) + + return f"{column} {id_token}" + + +def fill_missing_values(class_name: str, frame: pd.DataFrame) -> pd.DataFrame: + filled = frame.copy() + id_column = resolve_id_column(filled) + + for idx in filled.index: + id_token = normalize_id_token(filled.at[idx, id_column], int(idx) + 1) + for column in filled.columns: + lower_col = column.lower() + + if lower_col == "title": + current_title = normalize_existing_title(filled.at[idx, column]) + if not is_missing(current_title): + filled.at[idx, column] = current_title + else: + filled.at[idx, column] = class_title(class_name, int(idx), id_token) + continue + + if class_name == "Book" and lower_col == "author": + cleaned_author = strip_author_years(filled.at[idx, column]) + if not is_missing(cleaned_author): + filled.at[idx, column] = cleaned_author + else: + filled.at[idx, column] = generated_value(class_name, column, int(idx), id_token) + continue + + if is_missing(filled.at[idx, column]): + filled.at[idx, column] = generated_value(class_name, column, int(idx), id_token) + + return filled + + +def main() -> None: + missing_sources = [path for path in SOURCE_FILES.values() if not path.exists()] + if missing_sources: + names = ", ".join(str(path) for path in missing_sources) + raise FileNotFoundError(f"Missing source export files: {names}") + + TARGET_DIR.mkdir(parents=True, exist_ok=True) + + for class_name, source_path in SOURCE_FILES.items(): + frame = pd.read_csv(source_path, dtype="string") + output = fill_missing_values(class_name, frame) + if class_name == "Periodical": + output = sanitize_periodical_titles(output) + output = enforce_periodical_title_uniqueness(output) + if class_name == "Music": + output = sanitize_music_content(output) + output.to_csv(TARGET_DIR / source_path.name, index=False) + + if ZIP_BASE.with_suffix(".zip").exists(): + ZIP_BASE.with_suffix(".zip").unlink() + + shutil.make_archive(str(ZIP_BASE), "zip", root_dir=TARGET_DIR) + print(f"Created folder: {TARGET_DIR}") + print(f"Created zip: {ZIP_BASE.with_suffix('.zip')}") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/gitignore.ipynb b/gitignore.ipynb new file mode 100644 index 0000000..4495aea --- /dev/null +++ b/gitignore.ipynb @@ -0,0 +1,33 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "84264bb7-8e06-4893-805f-8a428f289df7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/inventory_report.txt b/inventory_report.txt new file mode 100644 index 0000000..cb29a7f --- /dev/null +++ b/inventory_report.txt @@ -0,0 +1,18 @@ + +Data inventory +file,type,size_bytes +/Users/nicky/Projects/CentralLibraryData/credits.csv,.csv,189917659 +/Users/nicky/Projects/CentralLibraryData/keywords.csv,.csv,6231943 +/Users/nicky/Projects/CentralLibraryData/links.csv,.csv,989107 +/Users/nicky/Projects/CentralLibraryData/links_small.csv,.csv,183372 +/Users/nicky/Projects/CentralLibraryData/metadata.json,.json,548 +/Users/nicky/Projects/CentralLibraryData/periodical-issues-schema.json,.json,985 +/Users/nicky/Projects/CentralLibraryData/periodical-issues.csv,.csv,9221860 +/Users/nicky/Projects/CentralLibraryData/periodical-titles-schema.json,.json,1837 +/Users/nicky/Projects/CentralLibraryData/periodical-titles.csv,.csv,295509 +/Users/nicky/Projects/CentralLibraryData/pg_catalog.csv,.csv,20608513 +/Users/nicky/Projects/CentralLibraryData/ratings.csv,.csv,709550327 +/Users/nicky/Projects/CentralLibraryData/ratings_small.csv,.csv,2438266 +/Users/nicky/Projects/CentralLibraryData/ro-crate-metadata.json,.json,16664 +/Users/nicky/Projects/CentralLibraryData/tcc_ceds_music.csv,.csv,27655251 +/Users/nicky/Projects/CentralLibraryData/titles-issues-added.ndjson,.ndjson,4030430 diff --git a/java_team_package_small.zip b/java_team_package_small.zip new file mode 100644 index 0000000..9cc3c14 Binary files /dev/null and b/java_team_package_small.zip differ diff --git a/java_team_package_small/README.txt b/java_team_package_small/README.txt new file mode 100644 index 0000000..af0f6f4 --- /dev/null +++ b/java_team_package_small/README.txt @@ -0,0 +1,29 @@ +Java Team Sample Package (Cleaned) + +This package contains compact sample datasets generated from cleaned_data/CentralLibraryData. + +Sampling method: +- CSV files: header + first 20 data rows + last 20 data rows +- NDJSON files: first 20 lines + last 20 lines + +Validation status: +- Source cleaned files were validated on 2026-04-22 +- CSV column-shape checks passed +- JSON/NDJSON parsing checks passed + +Golden valid sample records: +- valid-periodical-titles-sample.csv +- valid-periodical-issues-sample.csv + +Business-rule contract for the golden sample: +- periodical-issues.title_id must match periodical-titles.id +- periodical-titles.issue_count must equal the number of linked issue rows in the sample +- periodical-titles.start_date <= periodical-titles.end_date +- periodical-titles.start_year <= periodical-titles.end_year +- periodical-issues.date must be within the title date range +- periodical-issues.pages must be a positive integer + +Java contract test: +- Location: java-contract-tests/ +- Run: cd java-contract-tests && mvn test +- Test class: src/test/java/org/example/GoldenSampleContractTest.java diff --git a/java_team_package_small/java-contract-tests/pom.xml b/java_team_package_small/java-contract-tests/pom.xml new file mode 100644 index 0000000..2ff3e63 --- /dev/null +++ b/java_team_package_small/java-contract-tests/pom.xml @@ -0,0 +1,35 @@ + + 4.0.0 + + org.example + java-team-package-contract-tests + 1.0.0 + + + 17 + 17 + UTF-8 + 5.10.2 + + + + + org.junit.jupiter + junit-jupiter + ${junit.version} + test + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 3.2.5 + + + + diff --git a/java_team_package_small/java-contract-tests/src/test/java/org/example/GoldenSampleContractTest.java b/java_team_package_small/java-contract-tests/src/test/java/org/example/GoldenSampleContractTest.java new file mode 100644 index 0000000..162fda4 --- /dev/null +++ b/java_team_package_small/java-contract-tests/src/test/java/org/example/GoldenSampleContractTest.java @@ -0,0 +1,69 @@ +package org.example; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.LocalDate; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.junit.jupiter.api.Test; + +class GoldenSampleContractTest { + + @Test + void goldenSampleSatisfiesBusinessRules() throws IOException { + Path root = Path.of(".."); + Map title = readSingleRow(root.resolve("valid-periodical-titles-sample.csv")); + Map issue = readSingleRow(root.resolve("valid-periodical-issues-sample.csv")); + + assertNotNull(title); + assertNotNull(issue); + + String titleId = title.get("id"); + assertEquals(titleId, issue.get("title_id"), "issue.title_id must match title.id"); + + LocalDate startDate = LocalDate.parse(title.get("start_date")); + LocalDate endDate = LocalDate.parse(title.get("end_date")); + LocalDate issueDate = LocalDate.parse(issue.get("date")); + + assertFalse(startDate.isAfter(endDate), "title start_date must be <= end_date"); + assertTrue((!issueDate.isBefore(startDate)) && (!issueDate.isAfter(endDate)), + "issue date must be within title date range"); + + int startYear = Integer.parseInt(title.get("start_year")); + int endYear = Integer.parseInt(title.get("end_year")); + assertTrue(startYear <= endYear, "title start_year must be <= end_year"); + + int issueCount = Integer.parseInt(title.get("issue_count")); + assertEquals(1, issueCount, "title issue_count must equal linked issue row count in sample"); + + int pages = Integer.parseInt(issue.get("pages")); + assertTrue(pages > 0, "issue pages must be positive"); + } + + private static Map readSingleRow(Path file) throws IOException { + List lines = Files.readAllLines(file); + if (lines.size() < 2) { + throw new IllegalStateException("Expected header + 1 data row in " + file); + } + + String[] header = lines.get(0).split(",", -1); + String[] row = lines.get(1).split(",", -1); + if (header.length != row.length) { + throw new IllegalStateException("Header/data column mismatch in " + file); + } + + Map record = new HashMap<>(); + for (int i = 0; i < header.length; i++) { + record.put(header[i], row[i]); + } + return record; + } +} diff --git a/java_team_package_small_cleaned.zip b/java_team_package_small_cleaned.zip new file mode 100644 index 0000000..cc9c72a Binary files /dev/null and b/java_team_package_small_cleaned.zip differ diff --git a/java_team_package_small_cleaned/README.txt b/java_team_package_small_cleaned/README.txt new file mode 100644 index 0000000..af0f6f4 --- /dev/null +++ b/java_team_package_small_cleaned/README.txt @@ -0,0 +1,29 @@ +Java Team Sample Package (Cleaned) + +This package contains compact sample datasets generated from cleaned_data/CentralLibraryData. + +Sampling method: +- CSV files: header + first 20 data rows + last 20 data rows +- NDJSON files: first 20 lines + last 20 lines + +Validation status: +- Source cleaned files were validated on 2026-04-22 +- CSV column-shape checks passed +- JSON/NDJSON parsing checks passed + +Golden valid sample records: +- valid-periodical-titles-sample.csv +- valid-periodical-issues-sample.csv + +Business-rule contract for the golden sample: +- periodical-issues.title_id must match periodical-titles.id +- periodical-titles.issue_count must equal the number of linked issue rows in the sample +- periodical-titles.start_date <= periodical-titles.end_date +- periodical-titles.start_year <= periodical-titles.end_year +- periodical-issues.date must be within the title date range +- periodical-issues.pages must be a positive integer + +Java contract test: +- Location: java-contract-tests/ +- Run: cd java-contract-tests && mvn test +- Test class: src/test/java/org/example/GoldenSampleContractTest.java diff --git a/java_team_package_small_cleaned/java-contract-tests/pom.xml b/java_team_package_small_cleaned/java-contract-tests/pom.xml new file mode 100644 index 0000000..2ff3e63 --- /dev/null +++ b/java_team_package_small_cleaned/java-contract-tests/pom.xml @@ -0,0 +1,35 @@ + + 4.0.0 + + org.example + java-team-package-contract-tests + 1.0.0 + + + 17 + 17 + UTF-8 + 5.10.2 + + + + + org.junit.jupiter + junit-jupiter + ${junit.version} + test + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 3.2.5 + + + + diff --git a/java_team_package_small_cleaned/java-contract-tests/src/test/java/org/example/GoldenSampleContractTest.java b/java_team_package_small_cleaned/java-contract-tests/src/test/java/org/example/GoldenSampleContractTest.java new file mode 100644 index 0000000..162fda4 --- /dev/null +++ b/java_team_package_small_cleaned/java-contract-tests/src/test/java/org/example/GoldenSampleContractTest.java @@ -0,0 +1,69 @@ +package org.example; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.LocalDate; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.junit.jupiter.api.Test; + +class GoldenSampleContractTest { + + @Test + void goldenSampleSatisfiesBusinessRules() throws IOException { + Path root = Path.of(".."); + Map title = readSingleRow(root.resolve("valid-periodical-titles-sample.csv")); + Map issue = readSingleRow(root.resolve("valid-periodical-issues-sample.csv")); + + assertNotNull(title); + assertNotNull(issue); + + String titleId = title.get("id"); + assertEquals(titleId, issue.get("title_id"), "issue.title_id must match title.id"); + + LocalDate startDate = LocalDate.parse(title.get("start_date")); + LocalDate endDate = LocalDate.parse(title.get("end_date")); + LocalDate issueDate = LocalDate.parse(issue.get("date")); + + assertFalse(startDate.isAfter(endDate), "title start_date must be <= end_date"); + assertTrue((!issueDate.isBefore(startDate)) && (!issueDate.isAfter(endDate)), + "issue date must be within title date range"); + + int startYear = Integer.parseInt(title.get("start_year")); + int endYear = Integer.parseInt(title.get("end_year")); + assertTrue(startYear <= endYear, "title start_year must be <= end_year"); + + int issueCount = Integer.parseInt(title.get("issue_count")); + assertEquals(1, issueCount, "title issue_count must equal linked issue row count in sample"); + + int pages = Integer.parseInt(issue.get("pages")); + assertTrue(pages > 0, "issue pages must be positive"); + } + + private static Map readSingleRow(Path file) throws IOException { + List lines = Files.readAllLines(file); + if (lines.size() < 2) { + throw new IllegalStateException("Expected header + 1 data row in " + file); + } + + String[] header = lines.get(0).split(",", -1); + String[] row = lines.get(1).split(",", -1); + if (header.length != row.length) { + throw new IllegalStateException("Header/data column mismatch in " + file); + } + + Map record = new HashMap<>(); + for (int i = 0; i < header.length; i++) { + record.put(header[i], row[i]); + } + return record; + } +} diff --git a/keywords.csv.report.txt b/keywords.csv.report.txt new file mode 100644 index 0000000..5396b8c --- /dev/null +++ b/keywords.csv.report.txt @@ -0,0 +1,430 @@ + +=== PROFILE: /Users/nicky/Projects/CentralLibraryData/keywords.csv === +rows: 46419 +columns: 2 +column names and dtypes: +id int64 +keywords str + +first 5 rows: + id keywords + 862 [{'id': 931, 'name': 'jealousy'}, {'id': 4290, 'name': 'toy'}, {'id': 5202, 'name': 'boy'}, {'id': 6054, 'name': 'friendship'}, {'id': 9713, 'name': 'friends'}, {'id': 9823, 'name': 'rivalry'}, {'id': 165503, 'name': 'boy next door'}, {'id': 170722, 'name': 'new toy'}, {'id': 187065, 'name': 'toy comes to life'}] + 8844 [{'id': 10090, 'name': 'board game'}, {'id': 10941, 'name': 'disappearance'}, {'id': 15101, 'name': "based on children's book"}, {'id': 33467, 'name': 'new home'}, {'id': 158086, 'name': 'recluse'}, {'id': 158091, 'name': 'giant insect'}] +15602 [{'id': 1495, 'name': 'fishing'}, {'id': 12392, 'name': 'best friend'}, {'id': 179431, 'name': 'duringcreditsstinger'}, {'id': 208510, 'name': 'old men'}] +31357 [{'id': 818, 'name': 'based on novel'}, {'id': 10131, 'name': 'interracial relationship'}, {'id': 14768, 'name': 'single mother'}, {'id': 15160, 'name': 'divorce'}, {'id': 33455, 'name': 'chick flick'}] +11862 [{'id': 1009, 'name': 'baby'}, {'id': 1599, 'name': 'midlife crisis'}, {'id': 2246, 'name': 'confidence'}, {'id': 4995, 'name': 'aging'}, {'id': 5600, 'name': 'daughter'}, {'id': 10707, 'name': 'mother daughter relationship'}, {'id': 13149, 'name': 'pregnancy'}, {'id': 33358, 'name': 'contraception'}, {'id': 170521, 'name': 'gynecologist'}] + +null report (sorted): + null_count null_pct +id 0 0.0 +keywords 0 0.0 + +exact duplicate rows: 987 +duplicate index numbers (showing up to 200): [1465, 9165, 9327, 12066, 13375, 15074, 15765, 16764, 20842, 20898, 21115, 21164, 21853, 22150, 23043, 23533, 24843, 26624, 28859, 29373, 29999, 33741, 33824, 35795, 36342, 36343, 36344, 36345, 36346, 36347, 36348, 36349, 36350, 36351, 36352, 36353, 36354, 36355, 36356, 36357, 36358, 36359, 36360, 36361, 36362, 36363, 36364, 36365, 36366, 36367, 36368, 36369, 36370, 36371, 36372, 36373, 36374, 36375, 36376, 36377, 36378, 36379, 36380, 36381, 36382, 36383, 36384, 36385, 36386, 36387, 36388, 36389, 36390, 36391, 36392, 36393, 36394, 36395, 36396, 36397, 36398, 36399, 36400, 36401, 36402, 36403, 36404, 36405, 36406, 36407, 36408, 36409, 36410, 36411, 36412, 36413, 36414, 36415, 36416, 36417, 36418, 36419, 36420, 36421, 36422, 36423, 36424, 36425, 36426, 36427, 36428, 36429, 36430, 36431, 36432, 36433, 36434, 36435, 36436, 36437, 36438, 36439, 36440, 36441, 36442, 36443, 36444, 36445, 36446, 36447, 36448, 36449, 36450, 36451, 36452, 36453, 36454, 36455, 36456, 36457, 36458, 36459, 36460, 36461, 36462, 36463, 36464, 36465, 36466, 36467, 36468, 36469, 36470, 36471, 36472, 36473, 36474, 36475, 36476, 36477, 36478, 36479, 36480, 36481, 36482, 36483, 36484, 36485, 36486, 36487, 36488, 36489, 36490, 36491, 36492, 36493, 36494, 36495, 36496, 36497, 36498, 36499, 36500, 36501, 36502, 36503, 36504, 36505, 36506, 36507, 36508, 36509, 36510, 36511, 36512, 36513, 36514, 36515, 36516, 36517] + id keywords +1465 105045 [{'id': 7059, 'name': 'anti-communism'}, {'id': 33501, 'name': 'political'}, {'id': 187056, 'name': 'woman director'}] +9165 5511 [{'id': 90, 'name': 'paris'}, {'id': 1010, 'name': 'bar'}, {'id': 1416, 'name': 'jazz'}, {'id': 1972, 'name': 'hearing'}, {'id': 2101, 'name': 'garage'}, {'id': 2708, 'name': 'hitman'}, {'id': 5823, 'name': 'jazz club'}, {'id': 6149, 'name': 'police'}, {'id': 7453, 'name': 'canary'}, {'id': 10202, 'name': 'treason'}, {'id': 15017, 'name': 'danger'}, {'id': 41158, 'name': 'stakeout'}, {'id': 155845, 'name': 'french noir'}, {'id': 195402, 'name': 'film noir'}, {'id': 198778, 'name': 'last job'}, {'id': 206041, 'name': 'professional assassin'}, {'id': 212412, 'name': 'very little dialogue'}, {'id': 226754, 'name': 'silent protagonist'}] +9327 23305 [] +12066 14788 [{'id': 9826, 'name': 'murder'}, {'id': 10183, 'name': 'independent film'}, {'id': 196321, 'name': 'doll factory'}] +13375 141971 [] +15074 22649 [{'id': 131, 'name': 'italy'}, {'id': 428, 'name': 'nurse'}, {'id': 3452, 'name': 'love letter'}, {'id': 3776, 'name': 'officer'}, {'id': 10093, 'name': 'priest'}, {'id': 10685, 'name': 'escape'}, {'id': 11612, 'name': 'hospital'}, {'id': 12392, 'name': 'best friend'}, {'id': 156764, 'name': 'pre-code'}, {'id': 158547, 'name': 'air raid'}, {'id': 185034, 'name': 'ambulance driver'}, {'id': 185037, 'name': 'battle of the isonzo'}, {'id': 233360, 'name': 'air attack'}] +15765 13209 [] +16764 141971 [] +20842 77221 [] +20898 109962 [{'id': 1253, 'name': 'roommate'}, {'id': 6054, 'name': 'friendship'}] +21115 84198 [{'id': 187056, 'name': 'woman director'}] +21164 119916 [] +21853 152795 [{'id': 10336, 'name': 'animation'}] +22150 18440 [] +23043 25541 [{'id': 237, 'name': 'gay'}, {'id': 255, 'name': 'male nudity'}, {'id': 10180, 'name': 'homosexuality'}, {'id': 160910, 'name': 'xenophobia'}, {'id': 191726, 'name': 'nazism'}, {'id': 199974, 'name': 'neo-nazis'}] +23533 110428 [{'id': 254, 'name': 'france'}, {'id': 745, 'name': 'nun'}, {'id': 5565, 'name': 'biography'}, {'id': 12438, 'name': 'sculpture'}, {'id': 160174, 'name': 'mental asylum'}, {'id': 161167, 'name': 'persecution'}, {'id': 185722, 'name': 'based on true events'}, {'id': 188356, 'name': 'separation from family'}, {'id': 223232, 'name': 'estranged brother'}, {'id': 235133, 'name': 'religious institution'}, {'id': 235134, 'name': 'estranged mother'}] +24843 11115 [{'id': 271, 'name': 'competition'}, {'id': 383, 'name': 'poker'}, {'id': 550, 'name': 'callgirl'}, {'id': 1310, 'name': 'mentor'}, {'id': 2062, 'name': 'restart'}, {'id': 2246, 'name': 'confidence'}, {'id': 3090, 'name': 'loser'}, {'id': 6274, 'name': 'learning and teaching'}, {'id': 7496, 'name': 'gain'}, {'id': 8829, 'name': 'turnier'}, {'id': 10183, 'name': 'independent film'}, {'id': 10594, 'name': 'money'}, {'id': 33625, 'name': 'illegal prostitution'}] +26624 69234 [{'id': 2199, 'name': 'opera'}, {'id': 2546, 'name': 'mask'}, {'id': 3282, 'name': 'gondola'}, {'id': 3284, 'name': 'phantom'}, {'id': 9714, 'name': 'remake'}, {'id': 11162, 'name': 'miniseries'}, {'id': 231318, 'name': 'paris france'}, {'id': 233843, 'name': 'phantom of the opera'}] +28859 168538 [] +29373 42495 [] +29999 132641 [{'id': 818, 'name': 'based on novel'}, {'id': 4183, 'name': 'wife'}, {'id': 6038, 'name': 'marriage'}, {'id': 156455, 'name': 'unhappiness'}] +33741 15028 [{'id': 3667, 'name': 'time'}, {'id': 3800, 'name': 'airplane'}, {'id': 14751, 'name': 'youth'}, {'id': 158389, 'name': 'wristwatch'}] +33824 4912 [{'id': 4688, 'name': 'microfilm'}, {'id': 5565, 'name': 'biography'}, {'id': 18107, 'name': 'silencer'}, {'id': 34038, 'name': 'intrigue'}] +35795 159849 [] +36342 140300 [{'id': 478, 'name': 'china'}, {'id': 779, 'name': 'martial arts'}, {'id': 780, 'name': 'kung fu'}, {'id': 5331, 'name': 'village'}, {'id': 8531, 'name': 'panda'}, {'id': 9663, 'name': 'sequel'}, {'id': 11477, 'name': 'talking animal'}, {'id': 11500, 'name': 'anthropomorphism'}, {'id': 12554, 'name': 'dragon'}, {'id': 15036, 'name': 'ancient china'}, {'id': 184656, 'name': 'wuxia'}, {'id': 187056, 'name': 'woman director'}] +36343 16308 [] +36344 16157 [{'id': 2902, 'name': 'space battle'}, {'id': 10046, 'name': 'mecha'}, {'id': 161176, 'name': 'space opera'}, {'id': 197278, 'name': 'gundam'}] +36345 154738 [] +36346 45528 [] +36347 21442 [] +36348 319089 [] +36349 62691 [] +36350 79280 [] +36351 281524 [{'id': 8132, 'name': 'hooligan'}, {'id': 190197, 'name': 'hooliganism'}] +36352 72443 [{'id': 10183, 'name': 'independent film'}] +36353 58550 [{'id': 1453, 'name': 'amnesia'}, {'id': 6270, 'name': 'high school'}, {'id': 11469, 'name': 'memory loss'}, {'id': 13130, 'name': 'teenager'}, {'id': 223438, 'name': 'based on young adult novel'}] +36354 95164 [] +36355 160297 [] +36356 58551 [] +36357 72592 [] +36358 64882 [{'id': 158540, 'name': 'south korea'}] +36359 61888 [] +36360 142499 [] +36361 63465 [] +36362 85377 [] +36363 41835 [] +36364 100923 [{'id': 6054, 'name': 'friendship'}, {'id': 8029, 'name': 'drama'}, {'id': 10683, 'name': 'coming of age'}] +36365 138853 [] +36366 38000 [] +36367 45079 [] +36368 72505 [] +36369 286007 [] +36370 56232 [] +36371 67560 [{'id': 190370, 'name': 'erotic movie'}, {'id': 213493, 'name': 'sexual massage'}] +36372 108646 [] +36373 64228 [] +36374 41198 [] +36375 80618 [] +36376 114661 [] +36377 32695 [] +36378 313676 [] +36379 38011 [] +36380 116488 [] +36381 314283 [{'id': 207317, 'name': 'christmas'}] +36382 26381 [{'id': 65, 'name': 'holiday'}] +36383 206155 [{'id': 818, 'name': 'based on novel'}] +36384 49704 [{'id': 2032, 'name': 'pop singer'}] +36385 47331 [{'id': 9826, 'name': 'murder'}, {'id': 10714, 'name': 'serial killer'}, {'id': 12369, 'name': 'tokyo japan'}] +36386 159230 [] +36387 138665 [{'id': 9937, 'name': 'suspense'}, {'id': 187056, 'name': 'woman director'}] +36388 171587 [{'id': 158540, 'name': 'south korea'}] +36389 189151 [{'id': 13141, 'name': 'based on manga'}] +36390 169068 [] +36391 92499 [{'id': 198308, 'name': '오싹한 연애'}] +36392 232420 [{'id': 128, 'name': 'love triangle'}, {'id': 237, 'name': 'gay'}, {'id': 494, 'name': 'father son relationship'}, {'id': 818, 'name': 'based on novel'}, {'id': 848, 'name': 'double life'}, {'id': 907, 'name': 'japanese'}, {'id': 1157, 'name': 'wife husband relationship'}, {'id': 1328, 'name': 'secret'}, {'id': 1946, 'name': 'restaurant'}, {'id': 2692, 'name': 'arranged marriage'}, {'id': 3686, 'name': 'marriage of convenience'}, {'id': 4320, 'name': 'lover'}, {'id': 6038, 'name': 'marriage'}, {'id': 7464, 'name': 'alcoholism'}, {'id': 9673, 'name': 'love'}, {'id': 10048, 'name': 'unrequited love'}, {'id': 10199, 'name': 'gay interest'}, {'id': 10235, 'name': 'family relationships'}, {'id': 10707, 'name': 'mother daughter relationship'}, {'id': 11524, 'name': 'in the closet'}, {'id': 13005, 'name': 'doctor'}, {'id': 15300, 'name': 'father daughter relationship'}, {'id': 33457, 'name': 'alcoholic'}, {'id': 33458, 'name': 'gay man'}, {'id': 156104, 'name': 'stranger'}, {'id': 157499, 'name': 'mother son relationship'}, {'id': 184517, 'name': 'father-in-law son-in-law relationship'}] +36393 212640 [{'id': 187056, 'name': 'woman director'}] +36394 311093 [{'id': 65, 'name': 'holiday'}, {'id': 207317, 'name': 'christmas'}] +36395 13739 [] +36396 260583 [] +36397 343702 [] +36398 31127 [] +36399 40800 [{'id': 3184, 'name': 'alligator'}] +36400 62768 [{'id': 650, 'name': 'gymnastics'}] +36401 70702 [{'id': 9717, 'name': 'based on comic'}] +36402 372981 [{'id': 156183, 'name': 'west'}] +36403 320587 [{'id': 5565, 'name': 'biography'}, {'id': 187056, 'name': 'woman director'}] +36404 106222 [] +36405 200998 [] +36406 73526 [] +36407 26537 [{'id': 65, 'name': 'holiday'}] +36408 242551 [] +36409 128070 [] +36410 323929 [{'id': 4543, 'name': 'thanksgiving'}, {'id': 5733, 'name': 'turkey'}, {'id': 6782, 'name': 'addiction'}, {'id': 13000, 'name': 'based on short film'}, {'id': 18035, 'name': 'family'}, {'id': 41329, 'name': 'mental illness'}, {'id': 169635, 'name': 'psychological drama'}, {'id': 196911, 'name': 'estranged son'}, {'id': 210501, 'name': 'tragicomedy'}] +36411 45166 [{'id': 33720, 'name': 'guilt'}, {'id': 160224, 'name': 'post world war ii'}] +36412 60481 [{'id': 18300, 'name': 'english countryside'}, {'id': 207845, 'name': 'bumbling cops'}] +36413 229005 [{'id': 924, 'name': 'italian'}, {'id': 10235, 'name': 'family relationships'}, {'id': 10909, 'name': 'lawyer'}, {'id': 156764, 'name': 'pre-code'}, {'id': 169814, 'name': 'aviator'}] +36414 82036 [{'id': 197928, 'name': 'laurel and hardy'}] +36415 43788 [{'id': 1956, 'name': 'world war ii'}, {'id': 2652, 'name': 'nazis'}, {'id': 14618, 'name': 'british spy'}] +36416 43823 [] +36417 102051 [{'id': 10196, 'name': 'role playing'}, {'id': 157909, 'name': 'larp'}] +36418 364833 [] +36419 50542 [] +36420 24426 [{'id': 4379, 'name': 'time travel'}, {'id': 10506, 'name': 'prehistoric'}] +36421 40246 [{'id': 65, 'name': 'holiday'}, {'id': 4344, 'name': 'musical'}] +36422 141976 [] +36423 299578 [{'id': 9840, 'name': 'romance'}, {'id': 10267, 'name': 'comedy'}, {'id': 187056, 'name': 'woman director'}, {'id': 230785, 'name': 'noël'}] +36424 239180 [{'id': 9799, 'name': 'romantic comedy'}, {'id': 207317, 'name': 'christmas'}] +36425 352209 [] +36426 350849 [{'id': 128, 'name': 'love triangle'}, {'id': 10181, 'name': 'based on play or musical'}, {'id': 10644, 'name': 'melodrama'}, {'id': 14720, 'name': 'love affair'}, {'id': 156195, 'name': 'family feud'}, {'id': 187056, 'name': 'woman director'}, {'id': 214594, 'name': 'play adaptation'}] +36427 141476 [{'id': 207317, 'name': 'christmas'}] +36428 373976 [{'id': 187056, 'name': 'woman director'}] +36429 344041 [{'id': 1925, 'name': 'camera'}, {'id': 2546, 'name': 'mask'}, {'id': 6712, 'name': 'death of a child'}, {'id': 9826, 'name': 'murder'}, {'id': 9897, 'name': 'rifle'}, {'id': 13127, 'name': 'farmhouse'}, {'id': 159138, 'name': 'death of son'}, {'id': 166512, 'name': 'graveyard'}, {'id': 210746, 'name': 'hide'}, {'id': 221685, 'name': 'eyewitness'}] +36430 220488 [{'id': 12354, 'name': 'hong kong'}, {'id': 184188, 'name': 'macau'}, {'id': 187056, 'name': 'woman director'}] +36431 291348 [{'id': 11001, 'name': 'religion'}] +36432 335205 [] +36433 76354 [] +36434 37715 [{'id': 10183, 'name': 'independent film'}] +36435 70712 [] +36436 161482 [{'id': 163096, 'name': 'battle of the sexes'}, {'id': 187056, 'name': 'woman director'}, {'id': 219802, 'name': 'feel bad comedy'}, {'id': 219872, 'name': 'idle rich'}] +36437 362045 [{'id': 232174, 'name': 'любовь'}, {'id': 236528, 'name': 'битва'}, {'id': 236529, 'name': 'воин'}] +36438 278706 [{'id': 321, 'name': 'terror'}, {'id': 3929, 'name': 'hope'}, {'id': 4458, 'name': 'post-apocalyptic'}, {'id': 9937, 'name': 'suspense'}, {'id': 10292, 'name': 'gore'}, {'id': 12377, 'name': 'zombie'}, {'id': 186565, 'name': 'zombie apocalypse'}] +36439 96985 [{'id': 6951, 'name': 'biker'}, {'id': 14735, 'name': 'motorcycle'}] +36440 256687 [] +36441 350845 [] +36442 191731 [] +36443 43020 [{'id': 2036, 'name': 'greek mythology'}, {'id': 11121, 'name': 'plot'}] +36444 127642 [{'id': 6895, 'name': 'philippines'}, {'id': 215397, 'name': 'pinoy'}] +36445 315319 [] +36446 298722 [] +36447 370835 [{'id': 383, 'name': 'poker'}, {'id': 395, 'name': 'gambling'}] +36448 61464 [] +36449 60116 [] +36450 278122 [] +36451 299143 [{'id': 192947, 'name': 'religious film'}] +36452 113936 [] +36453 166888 [{'id': 2652, 'name': 'nazis'}, {'id': 10329, 'name': 'wealth'}] +36454 339934 [{'id': 6363, 'name': 'racing car'}, {'id': 10037, 'name': 'le mans'}, {'id': 162262, 'name': 'movie star'}] +36455 60895 [{'id': 10103, 'name': 'children'}, {'id': 10508, 'name': 'teacher'}, {'id': 10873, 'name': 'school'}] +36456 316179 [{'id': 187056, 'name': 'woman director'}] +36457 217250 [] +36458 373977 [] +36459 254736 [] +36460 91067 [] +36461 128230 [] +36462 347201 [{'id': 210024, 'name': 'anime'}, {'id': 215762, 'name': 'naruto'}] +36463 66772 [] +36464 253258 [] +36465 326415 [{'id': 2804, 'name': 'exploitation'}] +36466 89751 [{'id': 6553, 'name': 'beatnik'}] +36467 26550 [{'id': 894, 'name': 'depression'}, {'id': 1525, 'name': 'puberty'}, {'id': 1543, 'name': 'war veteran'}, {'id': 1610, 'name': 'country estate'}, {'id': 2087, 'name': 'lolita'}, {'id': 2504, 'name': 'world war i'}, {'id': 3691, 'name': 'forbidden love'}, {'id': 10683, 'name': 'coming of age'}, {'id': 10814, 'name': 'wounded'}, {'id': 178649, 'name': 'voyeurism'}] +36468 20360 [] +36469 373357 [] +36470 342927 [{'id': 1299, 'name': 'monster'}, {'id': 2766, 'name': 'mutation'}, {'id': 3544, 'name': 'machete'}, {'id': 15097, 'name': 'shark'}, {'id': 158253, 'name': 'mutant animal'}, {'id': 220829, 'name': 'booze cruise'}] +36471 68507 [{'id': 738, 'name': 'sexuality'}, {'id': 155693, 'name': 'erotic drama'}] +36472 272426 [] +36473 357940 [{'id': 3261, 'name': 'street war'}, {'id': 13142, 'name': 'gangster'}, {'id': 33683, 'name': 'beijing'}, {'id': 179093, 'name': 'criminal underworld'}] +36474 41032 [{'id': 6152, 'name': 'supernatural'}] +36475 292523 [] +36476 28997 [] +36477 73976 [{'id': 9833, 'name': 'lesbian relationship'}, {'id': 10180, 'name': 'homosexuality'}] +36478 37817 [] +36479 82745 [] +36480 85389 [] +36481 73919 [] +36482 69846 [] +36483 55138 [{'id': 208992, 'name': '1960s'}, {'id': 215019, 'name': 'barking'}] +36484 300601 [{'id': 13027, 'name': 'wedding'}, {'id': 178926, 'name': 'postponed wedding'}, {'id': 187056, 'name': 'woman director'}] +36485 100274 [{'id': 236, 'name': 'suicide'}, {'id': 331, 'name': 'tattoo'}, {'id': 572, 'name': 'sex'}, {'id': 2483, 'name': 'nudity'}, {'id': 6152, 'name': 'supernatural'}, {'id': 10183, 'name': 'independent film'}] +36486 372821 [] +36487 145711 [{'id': 207317, 'name': 'christmas'}] +36488 107257 [{'id': 10180, 'name': 'homosexuality'}, {'id': 34215, 'name': 'lesbian interest'}, {'id': 158718, 'name': 'lgbt'}, {'id': 187056, 'name': 'woman director'}] +36489 267319 [{'id': 236, 'name': 'suicide'}, {'id': 570, 'name': 'rape'}, {'id': 572, 'name': 'sex'}, {'id': 6593, 'name': 'stripper'}, {'id': 9748, 'name': 'revenge'}, {'id': 11291, 'name': 'girl heroine'}, {'id': 11322, 'name': 'female protagonist'}, {'id': 41172, 'name': 'schoolgirl'}] +36490 317442 [{'id': 10724, 'name': 'naruto shippuuden'}, {'id': 210024, 'name': 'anime'}, {'id': 215762, 'name': 'naruto'}] +36491 98440 [] +36492 98438 [] +36493 98439 [] +36494 261157 [] +36495 79708 [{'id': 187056, 'name': 'woman director'}] +36496 114018 [{'id': 128, 'name': 'love triangle'}, {'id': 1930, 'name': 'kidnapping'}, {'id': 9840, 'name': 'romance'}, {'id': 10322, 'name': 'native american'}, {'id': 179419, 'name': 'comanche'}, {'id': 220416, 'name': 'kiowa'}] +36497 267955 [{'id': 6186, 'name': 'games'}, {'id': 207372, 'name': 'quest'}] +36498 352200 [{'id': 441, 'name': 'assassination'}, {'id': 536, 'name': 'israel'}, {'id': 33501, 'name': 'political'}] +36499 366566 [{'id': 90, 'name': 'paris'}, {'id': 187056, 'name': 'woman director'}] +36500 337075 [{'id': 2343, 'name': 'magic'}, {'id': 2486, 'name': 'fantasy'}, {'id': 10092, 'name': 'mystery'}, {'id': 10987, 'name': 'cgi'}] +36501 309811 [] +36502 281291 [{'id': 478, 'name': 'china'}, {'id': 514, 'name': 'spain'}, {'id': 5565, 'name': 'biography'}, {'id': 6078, 'name': 'politics'}, {'id': 13159, 'name': 'chinese'}] +36503 362154 [] +36504 16632 [{'id': 9748, 'name': 'revenge'}, {'id': 167310, 'name': 'sitting on a toilet'}, {'id': 205603, 'name': 'korean movie'}] +36505 20969 [{'id': 5440, 'name': 'adversary'}, {'id': 9727, 'name': 'thief'}, {'id': 9840, 'name': 'romance'}, {'id': 206718, 'name': 'cons and scams'}, {'id': 220612, 'name': 'grifters'}, {'id': 220613, 'name': 'serial dating'}] +36506 16147 [{'id': 3012, 'name': 'court case'}, {'id': 3356, 'name': 'quebec'}, {'id': 5921, 'name': 'step mother'}] +36507 214129 [{'id': 6895, 'name': 'philippines'}, {'id': 9799, 'name': 'romantic comedy'}, {'id': 215397, 'name': 'pinoy'}] +36508 140054 [] +36509 129745 [] +36510 252981 [{'id': 6895, 'name': 'philippines'}, {'id': 13027, 'name': 'wedding'}, {'id': 180052, 'name': 'bride and groom'}, {'id': 187056, 'name': 'woman director'}, {'id': 192086, 'name': 'rental'}, {'id': 215397, 'name': 'pinoy'}] +36511 63064 [{'id': 187056, 'name': 'woman director'}] +36512 81560 [] +36513 41186 [{'id': 12354, 'name': 'hong kong'}] +36514 279998 [] +36515 36185 [{'id': 9840, 'name': 'romance'}] +36516 9375 [{'id': 1525, 'name': 'puberty'}, {'id': 2425, 'name': 'bodily disabled person'}, {'id': 6054, 'name': 'friendship'}, {'id': 6271, 'name': 'boarding school'}, {'id': 10791, 'name': 'teenage crush'}] +36517 36164 [] + +id duplicate rows for id: 1972 +duplicate index numbers (id, showing up to 200): [676, 838, 949, 1465, 2564, 4114, 4356, 5130, 5535, 5710, 5865, 7345, 8068, 9165, 9327, 9576, 10419, 11155, 11342, 12066, 13220, 13261, 13375, 13603, 13946, 14000, 14012, 15074, 15702, 15765, 16167, 16764, 17229, 19889, 19924, 20842, 20898, 21115, 21164, 21853, 22150, 23043, 23533, 24163, 24843, 26624, 28859, 29373, 29999, 33182, 33741, 33824, 35385, 35386, 35387, 35388, 35389, 35390, 35391, 35392, 35393, 35394, 35395, 35396, 35397, 35398, 35399, 35400, 35401, 35402, 35403, 35404, 35405, 35406, 35407, 35408, 35409, 35410, 35411, 35412, 35413, 35414, 35415, 35416, 35417, 35418, 35419, 35420, 35421, 35422, 35423, 35424, 35425, 35426, 35427, 35428, 35429, 35430, 35431, 35432, 35433, 35434, 35435, 35436, 35437, 35438, 35439, 35440, 35441, 35442, 35443, 35444, 35445, 35446, 35447, 35448, 35449, 35450, 35451, 35452, 35453, 35454, 35455, 35456, 35457, 35458, 35459, 35460, 35461, 35462, 35463, 35464, 35465, 35466, 35467, 35468, 35469, 35470, 35471, 35472, 35473, 35474, 35475, 35476, 35477, 35478, 35479, 35480, 35481, 35482, 35483, 35484, 35485, 35486, 35487, 35488, 35489, 35490, 35491, 35492, 35493, 35494, 35495, 35496, 35497, 35498, 35499, 35500, 35501, 35502, 35503, 35504, 35505, 35506, 35507, 35508, 35509, 35510, 35511, 35512, 35513, 35514, 35515, 35516, 35517, 35518, 35519, 35520, 35521, 35522, 35523, 35524, 35525, 35526, 35527, 35528, 35529, 35530, 35531, 35532] + id +36138 1998 +37095 1998 +35865 3025 +36822 3025 +35999 3692 +36956 3692 +36631 4459 +35674 4459 +35937 4709 +36894 4709 +33824 4912 +5865 4912 +35931 5067 +36888 5067 +7345 5511 +9165 5511 +35789 5953 +36746 5953 +36516 9375 +35559 9375 +36596 10509 +35639 10509 +37113 10626 +36156 10626 +45774 10991 +4114 10991 +37106 11089 +36149 11089 +24843 11115 +14012 11115 +36572 12135 +35615 12135 +5535 12600 +45779 12600 +37148 12831 +36191 12831 +15765 13209 +11342 13209 +35889 13274 +36846 13274 +35809 13713 +36766 13713 +36395 13739 +35438 13739 +12066 14788 +10419 14788 +5130 15028 +33741 15028 +36108 15391 +37065 15391 +35655 15994 +36612 15994 +35744 16075 +36701 16075 +36506 16147 +35549 16147 +36344 16157 +35387 16157 +36343 16308 +35386 16308 +35547 16632 +36504 16632 +35890 16907 +36847 16907 +35763 16912 +36720 16912 +36262 17276 +37219 17276 +36575 17486 +35618 17486 +36234 17720 +37191 17720 +36590 17902 +35633 17902 +37143 18174 +36186 18174 +37173 18230 +36216 18230 +22150 18440 +14000 18440 +36146 18874 +37103 18874 +37225 19252 +36268 19252 +37223 19297 +36266 19297 +37217 20051 +36260 20051 +35609 20226 +36566 20226 +35511 20360 +36468 20360 +36588 20388 +35631 20388 +37154 20916 +36197 20916 +36505 20969 +35548 20969 +36563 21167 +35606 21167 +37090 21266 +36133 21266 +36347 21442 +35390 21442 +35624 22034 +36581 22034 +37151 22316 +36194 22316 +36598 22454 +35641 22454 +15074 22649 +949 22649 +9327 23305 +8068 23305 +36812 23382 +35855 23382 +36912 24041 +35955 24041 +36705 24154 +35748 24154 +35463 24426 +36420 24426 +35632 24524 +36589 24524 +37218 24569 +36261 24569 +36334 24570 +37291 24570 +37220 24571 +36263 24571 +37234 24654 +36277 24654 +37034 25366 +36077 25366 +23043 25541 +17229 25541 +36206 25801 +37163 25801 +35604 26199 +36561 26199 +35881 26314 +36838 26314 +36678 26379 +35721 26379 +35425 26381 +36382 26381 +36407 26537 +35450 26537 +36467 26550 +35510 26550 +36019 26703 +36976 26703 +36936 27069 +35979 27069 +36030 27428 +36987 27428 +36625 27548 +35668 27548 +35722 27621 +36679 27621 +36938 27748 +35981 27748 +35707 28859 +36664 28859 +35965 28920 +36922 28920 +35519 28997 +36476 28997 +36270 29114 +37227 29114 +36040 29351 +36997 29351 +36093 29695 +37050 29695 +36726 29982 +35769 29982 +35730 30117 +36687 30117 +35580 31078 +36537 31078 +36398 31127 +35441 31127 +35780 31237 +36737 31237 +35807 31343 +36764 31343 +35888 31855 +36845 31855 +35779 32643 +36736 32643 +36377 32695 +35420 32695 +37009 35021 +36052 35021 +37129 35025 +36172 35025 +36025 35411 +36982 35411 +36615 35639 +35658 35639 +CLEANED: /Users/nicky/Projects/CentralLibraryData/keywords.csv -> /Users/nicky/Projects/CentralLibraryData/cleaned_data/keywords.csv (dupes=987, nulls=0) +validation passed: keywords.csv diff --git a/links.csv.report.txt b/links.csv.report.txt new file mode 100644 index 0000000..c92415e --- /dev/null +++ b/links.csv.report.txt @@ -0,0 +1,25 @@ + +=== PROFILE: /Users/nicky/Projects/CentralLibraryData/links.csv === +rows: 45843 +columns: 3 +column names and dtypes: +movieId int64 +imdbId int64 +tmdbId float64 + +first 5 rows: + movieId imdbId tmdbId + 1 114709 862.0 + 2 113497 8844.0 + 3 113228 15602.0 + 4 114885 31357.0 + 5 113041 11862.0 + +null report (sorted): + null_count null_pct +tmdbId 219 0.477717 +movieId 0 0.000000 +imdbId 0 0.000000 + +exact duplicate rows: 0 +UNCHANGED: /Users/nicky/Projects/CentralLibraryData/links.csv diff --git a/links_small.csv.report.txt b/links_small.csv.report.txt new file mode 100644 index 0000000..c7c17d0 --- /dev/null +++ b/links_small.csv.report.txt @@ -0,0 +1,25 @@ + +=== PROFILE: /Users/nicky/Projects/CentralLibraryData/links_small.csv === +rows: 9125 +columns: 3 +column names and dtypes: +movieId int64 +imdbId int64 +tmdbId float64 + +first 5 rows: + movieId imdbId tmdbId + 1 114709 862.0 + 2 113497 8844.0 + 3 113228 15602.0 + 4 114885 31357.0 + 5 113041 11862.0 + +null report (sorted): + null_count null_pct +tmdbId 13 0.142466 +movieId 0 0.000000 +imdbId 0 0.000000 + +exact duplicate rows: 0 +UNCHANGED: /Users/nicky/Projects/CentralLibraryData/links_small.csv diff --git a/metadata.json.report.txt b/metadata.json.report.txt new file mode 100644 index 0000000..313926b --- /dev/null +++ b/metadata.json.report.txt @@ -0,0 +1,8 @@ +CLEANED: /Users/nicky/Projects/CentralLibraryData/metadata.json -> /Users/nicky/Projects/CentralLibraryData/cleaned_data/metadata.json (dupes=0, nulls=0) + +=== REPORT: /Users/nicky/Projects/CentralLibraryData/metadata.json === +file type: .json +status: changed +output: /Users/nicky/Projects/CentralLibraryData/cleaned_data/metadata.json +duplicates removed: 0 +null rows removed: 0 diff --git a/periodical-issues.csv.report.txt b/periodical-issues.csv.report.txt new file mode 100644 index 0000000..a4a3277 --- /dev/null +++ b/periodical-issues.csv.report.txt @@ -0,0 +1,268 @@ + +=== PROFILE: /Users/nicky/Projects/CentralLibraryData/periodical-issues.csv === +rows: 37015 +columns: 8 +column names and dtypes: +id str +title_id str +title str +description str +date str +url str +pages int64 +text_download_url str + +first 5 rows: + id title_id title description date url pages text_download_url +nla.obj-8447243 nla.obj-8423556 "Coo-ee!" : the journal of the Bishops Knoll Hospital, Bristol. Volume 1 Number 1 1916-11-10 https://nla.gov.au/nla.obj-8447243 48 https://trove.nla.gov.au/nla.obj-8447243/download?downloadOption=ocr&firstPage=0&lastPage=47 +nla.obj-8452230 nla.obj-8423556 "Coo-ee!" : the journal of the Bishops Knoll Hospital, Bristol. Volume 1 Number 2 1916-12-20 https://nla.gov.au/nla.obj-8452230 40 https://trove.nla.gov.au/nla.obj-8452230/download?downloadOption=ocr&firstPage=0&lastPage=39 +nla.obj-8458012 nla.obj-8423556 "Coo-ee!" : the journal of the Bishops Knoll Hospital, Bristol. Volume 1 Number 3 1917-01-19 https://nla.gov.au/nla.obj-8458012 40 https://trove.nla.gov.au/nla.obj-8458012/download?downloadOption=ocr&firstPage=0&lastPage=39 +nla.obj-8465575 nla.obj-8423556 "Coo-ee!" : the journal of the Bishops Knoll Hospital, Bristol. Volume 1 Number 4 1917-02-19 https://nla.gov.au/nla.obj-8465575 40 https://trove.nla.gov.au/nla.obj-8465575/download?downloadOption=ocr&firstPage=0&lastPage=39 +nla.obj-8469716 nla.obj-8423556 "Coo-ee!" : the journal of the Bishops Knoll Hospital, Bristol. Volume 1 Number 5 1917-03-09 https://nla.gov.au/nla.obj-8469716 36 https://trove.nla.gov.au/nla.obj-8469716/download?downloadOption=ocr&firstPage=0&lastPage=35 + +null report (sorted): + null_count null_pct +date 431 1.164393 +description 16 0.043226 +id 0 0.000000 +title_id 0 0.000000 +title 0 0.000000 +url 0 0.000000 +pages 0 0.000000 +text_download_url 0 0.000000 + +exact duplicate rows: 0 + +id duplicate rows for title_id: 36793 +duplicate index numbers (title_id, showing up to 200): [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 21, 22, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204] + title_id +1616 nla.obj-1037567 +1611 nla.obj-1037567 +1610 nla.obj-1037567 +1609 nla.obj-1037567 +1608 nla.obj-1037567 +1607 nla.obj-1037567 +1606 nla.obj-1037567 +1605 nla.obj-1037567 +1604 nla.obj-1037567 +1603 nla.obj-1037567 +1602 nla.obj-1037567 +1601 nla.obj-1037567 +1600 nla.obj-1037567 +1599 nla.obj-1037567 +1598 nla.obj-1037567 +1597 nla.obj-1037567 +1596 nla.obj-1037567 +1595 nla.obj-1037567 +1594 nla.obj-1037567 +1593 nla.obj-1037567 +1592 nla.obj-1037567 +1591 nla.obj-1037567 +1612 nla.obj-1037567 +1613 nla.obj-1037567 +1614 nla.obj-1037567 +1615 nla.obj-1037567 +1638 nla.obj-1037567 +1637 nla.obj-1037567 +1636 nla.obj-1037567 +1635 nla.obj-1037567 +1634 nla.obj-1037567 +1633 nla.obj-1037567 +1632 nla.obj-1037567 +1631 nla.obj-1037567 +1630 nla.obj-1037567 +1629 nla.obj-1037567 +1590 nla.obj-1037567 +1628 nla.obj-1037567 +1626 nla.obj-1037567 +1625 nla.obj-1037567 +1624 nla.obj-1037567 +1623 nla.obj-1037567 +1622 nla.obj-1037567 +1621 nla.obj-1037567 +1620 nla.obj-1037567 +1619 nla.obj-1037567 +1618 nla.obj-1037567 +1617 nla.obj-1037567 +1627 nla.obj-1037567 +1639 nla.obj-1037567 +1589 nla.obj-1037567 +1587 nla.obj-1037567 +1560 nla.obj-1037567 +1559 nla.obj-1037567 +1558 nla.obj-1037567 +1557 nla.obj-1037567 +1556 nla.obj-1037567 +1555 nla.obj-1037567 +1554 nla.obj-1037567 +1553 nla.obj-1037567 +1552 nla.obj-1037567 +1551 nla.obj-1037567 +1550 nla.obj-1037567 +1549 nla.obj-1037567 +1548 nla.obj-1037567 +1547 nla.obj-1037567 +1546 nla.obj-1037567 +1545 nla.obj-1037567 +1544 nla.obj-1037567 +1543 nla.obj-1037567 +1542 nla.obj-1037567 +1541 nla.obj-1037567 +1540 nla.obj-1037567 +1561 nla.obj-1037567 +1562 nla.obj-1037567 +1563 nla.obj-1037567 +1564 nla.obj-1037567 +1586 nla.obj-1037567 +1585 nla.obj-1037567 +1584 nla.obj-1037567 +1583 nla.obj-1037567 +1582 nla.obj-1037567 +1581 nla.obj-1037567 +1580 nla.obj-1037567 +1579 nla.obj-1037567 +1578 nla.obj-1037567 +1577 nla.obj-1037567 +1588 nla.obj-1037567 +1576 nla.obj-1037567 +1574 nla.obj-1037567 +1573 nla.obj-1037567 +1572 nla.obj-1037567 +1571 nla.obj-1037567 +1570 nla.obj-1037567 +1569 nla.obj-1037567 +1568 nla.obj-1037567 +1567 nla.obj-1037567 +1566 nla.obj-1037567 +1565 nla.obj-1037567 +1575 nla.obj-1037567 +1640 nla.obj-1037567 +1641 nla.obj-1037567 +1642 nla.obj-1037567 +1711 nla.obj-1037567 +1712 nla.obj-1037567 +1713 nla.obj-1037567 +1714 nla.obj-1037567 +1715 nla.obj-1037567 +1716 nla.obj-1037567 +1717 nla.obj-1037567 +1718 nla.obj-1037567 +1719 nla.obj-1037567 +1720 nla.obj-1037567 +1721 nla.obj-1037567 +1722 nla.obj-1037567 +1723 nla.obj-1037567 +1724 nla.obj-1037567 +1725 nla.obj-1037567 +1726 nla.obj-1037567 +1727 nla.obj-1037567 +1728 nla.obj-1037567 +1729 nla.obj-1037567 +1730 nla.obj-1037567 +1731 nla.obj-1037567 +1710 nla.obj-1037567 +1709 nla.obj-1037567 +1708 nla.obj-1037567 +1707 nla.obj-1037567 +1685 nla.obj-1037567 +1686 nla.obj-1037567 +1687 nla.obj-1037567 +1688 nla.obj-1037567 +1689 nla.obj-1037567 +1690 nla.obj-1037567 +1691 nla.obj-1037567 +1692 nla.obj-1037567 +1693 nla.obj-1037567 +1694 nla.obj-1037567 +1732 nla.obj-1037567 +1695 nla.obj-1037567 +1697 nla.obj-1037567 +1698 nla.obj-1037567 +1699 nla.obj-1037567 +1700 nla.obj-1037567 +1701 nla.obj-1037567 +1702 nla.obj-1037567 +1703 nla.obj-1037567 +1704 nla.obj-1037567 +1705 nla.obj-1037567 +1706 nla.obj-1037567 +1696 nla.obj-1037567 +1733 nla.obj-1037567 +1734 nla.obj-1037567 +1735 nla.obj-1037567 +1664 nla.obj-1037567 +1663 nla.obj-1037567 +1662 nla.obj-1037567 +1661 nla.obj-1037567 +1660 nla.obj-1037567 +1659 nla.obj-1037567 +1658 nla.obj-1037567 +1657 nla.obj-1037567 +1656 nla.obj-1037567 +1655 nla.obj-1037567 +1665 nla.obj-1037567 +1654 nla.obj-1037567 +1652 nla.obj-1037567 +1651 nla.obj-1037567 +1650 nla.obj-1037567 +1649 nla.obj-1037567 +1648 nla.obj-1037567 +1647 nla.obj-1037567 +1646 nla.obj-1037567 +1645 nla.obj-1037567 +1644 nla.obj-1037567 +1643 nla.obj-1037567 +1653 nla.obj-1037567 +1539 nla.obj-1037567 +1666 nla.obj-1037567 +1668 nla.obj-1037567 +1736 nla.obj-1037567 +1737 nla.obj-1037567 +1738 nla.obj-1037567 +1739 nla.obj-1037567 +1740 nla.obj-1037567 +1741 nla.obj-1037567 +1742 nla.obj-1037567 +1743 nla.obj-1037567 +1682 nla.obj-1037567 +1681 nla.obj-1037567 +1667 nla.obj-1037567 +1680 nla.obj-1037567 +1678 nla.obj-1037567 +1677 nla.obj-1037567 +1676 nla.obj-1037567 +1675 nla.obj-1037567 +1674 nla.obj-1037567 +1673 nla.obj-1037567 +1672 nla.obj-1037567 +1671 nla.obj-1037567 + +numeric column stats: +pages: min=1 max=3290 mean=51.939808185870596 +non-numeric values in text_download_url: 37015 + text_download_url +0 https://trove.nla.gov.au/nla.obj-8447243/download?downloadOption=ocr&firstPage=0&lastPage=47 +1 https://trove.nla.gov.au/nla.obj-8452230/download?downloadOption=ocr&firstPage=0&lastPage=39 +2 https://trove.nla.gov.au/nla.obj-8458012/download?downloadOption=ocr&firstPage=0&lastPage=39 +3 https://trove.nla.gov.au/nla.obj-8465575/download?downloadOption=ocr&firstPage=0&lastPage=39 +4 https://trove.nla.gov.au/nla.obj-8469716/download?downloadOption=ocr&firstPage=0&lastPage=35 +5 https://trove.nla.gov.au/nla.obj-8473393/download?downloadOption=ocr&firstPage=0&lastPage=37 +6 https://trove.nla.gov.au/nla.obj-8477277/download?downloadOption=ocr&firstPage=0&lastPage=35 +7 https://trove.nla.gov.au/nla.obj-8480954/download?downloadOption=ocr&firstPage=0&lastPage=37 +8 https://trove.nla.gov.au/nla.obj-9139951/download?downloadOption=ocr&firstPage=0&lastPage=37 +9 https://trove.nla.gov.au/nla.obj-9161241/download?downloadOption=ocr&firstPage=0&lastPage=35 +10 https://trove.nla.gov.au/nla.obj-15922909/download?downloadOption=ocr&firstPage=0&lastPage=37 +11 https://trove.nla.gov.au/nla.obj-9557231/download?downloadOption=ocr&firstPage=0&lastPage=35 +12 https://trove.nla.gov.au/nla.obj-9878439/download?downloadOption=ocr&firstPage=0&lastPage=41 +13 https://trove.nla.gov.au/nla.obj-3041640304/download?downloadOption=ocr&firstPage=0&lastPage=343 +14 https://trove.nla.gov.au/nla.obj-3008849314/download?downloadOption=ocr&firstPage=0&lastPage=159 +15 https://trove.nla.gov.au/nla.obj-15967449/download?downloadOption=ocr&firstPage=0&lastPage=16 +16 https://trove.nla.gov.au/nla.obj-170047078/download?downloadOption=ocr&firstPage=0&lastPage=51 +17 https://trove.nla.gov.au/nla.obj-171920359/download?downloadOption=ocr&firstPage=0&lastPage=50 +18 https://trove.nla.gov.au/nla.obj-181287479/download?downloadOption=ocr&firstPage=0&lastPage=43 +19 https://trove.nla.gov.au/nla.obj-181292335/download?downloadOption=ocr&firstPage=0&lastPage=35 + +date parsing checks: +unparseable dates in date: 0 +CLEANED: /Users/nicky/Projects/CentralLibraryData/periodical-issues.csv -> /Users/nicky/Projects/CentralLibraryData/cleaned_data/periodical-issues.csv (dupes=0, nulls=0) +validation passed: periodical-issues.csv diff --git a/periodical-titles.csv.report.txt b/periodical-titles.csv.report.txt new file mode 100644 index 0000000..838af87 --- /dev/null +++ b/periodical-titles.csv.report.txt @@ -0,0 +1,83 @@ + +=== PROFILE: /Users/nicky/Projects/CentralLibraryData/periodical-titles.csv === +rows: 908 +columns: 15 +column names and dtypes: +id str +title str +description str +publisher str +trove_url str +download_text str +issue_count int64 +start_date str +end_date str +start_year float64 +end_year float64 +extent str +place str +issn str +catalogue_url str + +first 5 rows: + id title description publisher trove_url download_text issue_count start_date end_date start_year end_year extent place issn catalogue_url + nla.obj-8423556 "Coo-ee!" : the journal of the Bishops Knoll Hospital, Bristol. NaN Partridge & Love Ltd. https://nla.gov.au/nla.obj-8423556 https://trove-journals.s3.ap-southeast-2.amazonaws.com/coo-ee-the-journal-of-the-bishops-knoll-hospital-b-nla.obj-8423556.zip 13 1916-01-01 1917-10-20 1916.0 1917.0 1 v. ; ill. ; 25 cm. NaN NaN https://nla.gov.au/nla.cat-vn377167 +nla.obj-2998991958 (Ross's) Hobart Town almanack, and Van Diemen's Land annual. NaN Printed by James Ross https://nla.gov.au/nla.obj-2998991958 https://trove-journals.s3.ap-southeast-2.amazonaws.com/ross-s-hobart-town-almanack-and-van-diemen-s-land--nla.obj-2998991958.zip 1 1835-01-01 1835-01-01 1835.0 1835.0 2 volumes : illustrations ; 18 cm. Tasmania NaN https://nla.gov.au/nla.cat-vn610781 +nla.obj-2998997061 (Ross's) Van Diemen's Land annual and Hobart Town almanack. NaN Printed by James Ross https://nla.gov.au/nla.obj-2998997061 https://trove-journals.s3.ap-southeast-2.amazonaws.com/ross-s-van-diemen-s-land-annual-and-hobart-town-al-nla.obj-2998997061.zip 1 1834-01-01 1934-01-01 1934.0 1934.0 1 volumes : illustrations ; 18 cm. Tasmania NaN https://nla.gov.au/nla.cat-vn2133206 + nla.obj-15956697 14th Company magazine. NaN 14th Company. https://nla.gov.au/nla.obj-15956697 https://trove-journals.s3.ap-southeast-2.amazonaws.com/14th-company-magazine-nla.obj-15956697.zip 1 0191-01-01 1918-06-14 1918.0 1918.0 v. ; 25 cm. Australia NaN https://nla.gov.au/nla.cat-vn585007 + nla.obj-250818955 20 favourite & war-time camp songs [music]. NaN Sterling Music https://nla.gov.au/nla.obj-250818955 https://trove-journals.s3.ap-southeast-2.amazonaws.com/20-favourite-war-time-camp-songs-music-nla.obj-250818955.zip 7 1940-01-01 NaN NaN NaN 7 v. of music ; 29 cm. NaN NaN https://nla.gov.au/nla.cat-vn2926496 + +null report (sorted): + null_count null_pct +description 891 98.127753 +issn 801 88.215859 +place 294 32.378855 +end_date 196 21.585903 +start_year 173 19.052863 +end_year 173 19.052863 +download_text 109 12.004405 +extent 22 2.422907 +publisher 12 1.321586 +start_date 9 0.991189 +trove_url 5 0.550661 +catalogue_url 1 0.110132 +id 0 0.000000 +title 0 0.000000 +issue_count 0 0.000000 + +exact duplicate rows: 0 + +numeric column stats: +non-numeric values in download_text: 799 + download_text +0 https://trove-journals.s3.ap-southeast-2.amazonaws.com/coo-ee-the-journal-of-the-bishops-knoll-hospital-b-nla.obj-8423556.zip +1 https://trove-journals.s3.ap-southeast-2.amazonaws.com/ross-s-hobart-town-almanack-and-van-diemen-s-land--nla.obj-2998991958.zip +2 https://trove-journals.s3.ap-southeast-2.amazonaws.com/ross-s-van-diemen-s-land-annual-and-hobart-town-al-nla.obj-2998997061.zip +3 https://trove-journals.s3.ap-southeast-2.amazonaws.com/14th-company-magazine-nla.obj-15956697.zip +4 https://trove-journals.s3.ap-southeast-2.amazonaws.com/20-favourite-war-time-camp-songs-music-nla.obj-250818955.zip +5 https://trove-journals.s3.ap-southeast-2.amazonaws.com/24th-battalion-journal-nla.obj-24207812.zip +6 https://trove-journals.s3.ap-southeast-2.amazonaws.com/a-careta-nla.obj-320275568.zip +8 https://trove-journals.s3.ap-southeast-2.amazonaws.com/a-short-report-on-the-evidence-in-the-case-of-regi-nla.obj-3089884707.zip +9 https://trove-journals.s3.ap-southeast-2.amazonaws.com/a-h-massina-co-s-weather-almanac-and-general-guide-nla.obj-2905264494.zip +10 https://trove-journals.s3.ap-southeast-2.amazonaws.com/a-h-massina-co-s-weather-almanac-for-with-wall-cal-nla.obj-3086457649.zip +11 https://trove-journals.s3.ap-southeast-2.amazonaws.com/abc-weekly-nla.obj-890736639.zip +12 https://trove-journals.s3.ap-southeast-2.amazonaws.com/anare-reports-series-b-nla.obj-254636277.zip +13 https://trove-journals.s3.ap-southeast-2.amazonaws.com/anzhes-journal-nla.obj-2836256024.zip +14 https://trove-journals.s3.ap-southeast-2.amazonaws.com/abstract-of-the-report-of-the-london-missionary-so-nla.obj-288413164.zip +16 https://trove-journals.s3.ap-southeast-2.amazonaws.com/action-front-journal-of-the-2-2-field-regiment-nla.obj-321169907.zip +17 https://trove-journals.s3.ap-southeast-2.amazonaws.com/activities-nla.obj-806514451.zip +18 https://trove-journals.s3.ap-southeast-2.amazonaws.com/acts-of-parliament-of-victoria-nla.obj-54185597.zip +19 https://trove-journals.s3.ap-southeast-2.amazonaws.com/acts-of-the-parliament-passed-in-the-first-year-of-nla.obj-54127737.zip +20 https://trove-journals.s3.ap-southeast-2.amazonaws.com/acts-of-the-parliament-of-south-australia-nla.obj-53890906.zip +21 https://trove-journals.s3.ap-southeast-2.amazonaws.com/acts-of-the-victorian-parliament-nla.obj-55252550.zip +issue_count: min=0 max=5418 mean=41.02092511013216 +start_year: min=196.0 max=2007.0 mean=1906.9795918367347 +end_year: min=1819.0 max=2022.0 mean=1919.7034013605441 + +date parsing checks: +unparseable dates in start_date: 0 +unparseable dates in end_date: 0 +unparseable dates in start_year: 0 +unparseable dates in end_year: 0 +CLEANED: /Users/nicky/Projects/CentralLibraryData/periodical-titles.csv -> /Users/nicky/Projects/CentralLibraryData/cleaned_data/periodical-titles.csv (dupes=0, nulls=0) +validation passed: periodical-titles.csv diff --git a/periodicals.db b/periodicals.db new file mode 100644 index 0000000..e6d5834 Binary files /dev/null and b/periodicals.db differ diff --git a/pg_catalog.csv.report.txt b/pg_catalog.csv.report.txt new file mode 100644 index 0000000..a6b7e07 --- /dev/null +++ b/pg_catalog.csv.report.txt @@ -0,0 +1,41 @@ + +=== PROFILE: /Users/nicky/Projects/CentralLibraryData/pg_catalog.csv === +rows: 76135 +columns: 9 +column names and dtypes: +Text# int64 +Type str +Issued str +Title str +Language str +Authors str +Subjects str +LoCC str +Bookshelves str + +first 5 rows: + Text# Type Issued Title Language Authors Subjects LoCC Bookshelves + 1 Text 1971-12-01 The Declaration of Independence of the United States of America en Jefferson, Thomas, 1743-1826 United States -- History -- Revolution, 1775-1783 -- Sources; United States. Declaration of Independence E201; JK Politics; American Revolutionary War; United States Law; Browsing: History - American; Browsing: History - Warfare; Browsing: Politics + 2 Text 1972-12-01 The United States Bill of Rights\r\nThe Ten Original Amendments to the Constitution of the United States en United States Civil rights -- United States -- Sources; United States. Constitution. 1st-10th Amendments JK; KF Politics; American Revolutionary War; United States Law; Browsing: History - American; Browsing: Law & Criminology; Browsing: Politics + 3 Text 1973-11-01 John F. Kennedy's Inaugural Address en Kennedy, John F. (John Fitzgerald), 1917-1963 United States -- Foreign relations -- 1961-1963; Presidents -- United States -- Inaugural addresses E838 Browsing: History - American; Browsing: Politics + 4 Text 1973-11-01 Lincoln's Gettysburg Address\r\nGiven November 19, 1863 on the battlefield near Gettysburg, Pennsylvania, USA en Lincoln, Abraham, 1809-1865 Consecration of cemeteries -- Pennsylvania -- Gettysburg; Soldiers' National Cemetery (Gettysburg, Pa.); Lincoln, Abraham, 1809-1865. Gettysburg address E456 US Civil War; Browsing: History - American; Browsing: Politics + 5 Text 1975-12-01 The United States Constitution en United States United States -- Politics and government -- 1783-1789 -- Sources; United States. Constitution JK; KF United States; Politics; American Revolutionary War; United States Law; Browsing: History - American; Browsing: Law & Criminology; Browsing: Politics + +null report (sorted): + null_count null_pct +LoCC 276 0.362514 +Bookshelves 273 0.358574 +Authors 165 0.216720 +Subjects 69 0.090628 +Text# 0 0.000000 +Type 0 0.000000 +Issued 0 0.000000 +Title 0 0.000000 +Language 0 0.000000 + +exact duplicate rows: 0 + +numeric column stats: +Text#: min=1 max=76314 mean=38146.252761542 +CLEANED: /Users/nicky/Projects/CentralLibraryData/pg_catalog.csv -> /Users/nicky/Projects/CentralLibraryData/cleaned_data/pg_catalog.csv (dupes=5, nulls=0) +validation passed: pg_catalog.csv diff --git a/ratings.csv.report.txt b/ratings.csv.report.txt new file mode 100644 index 0000000..7d49938 --- /dev/null +++ b/ratings.csv.report.txt @@ -0,0 +1 @@ +CHUNK: ratings.csv rows_in_chunk=200000 dupes=0 nulls=0 [offset=5034555] diff --git a/ratings_small.csv.report.txt b/ratings_small.csv.report.txt new file mode 100644 index 0000000..464dfe0 --- /dev/null +++ b/ratings_small.csv.report.txt @@ -0,0 +1 @@ +UNCHANGED: /Users/nicky/Projects/CentralLibraryData/ratings_small.csv diff --git a/requirements.in b/requirements.in new file mode 100644 index 0000000..302cc31 --- /dev/null +++ b/requirements.in @@ -0,0 +1,8 @@ +frictionless +rdflib>=7.* +rocrate +# To avoid trying to install rdflib-json +jupyterlab +pandas +humanize +pytest \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..cccd077 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,399 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile requirements.in +# +aiohttp==3.9.1 + # via tuspy +aiosignal==1.3.1 + # via aiohttp +annotated-types==0.6.0 + # via pydantic +anyio==4.2.0 + # via jupyter-server +arcp==0.2.1 + # via rocrate +argon2-cffi==23.1.0 + # via jupyter-server +argon2-cffi-bindings==21.2.0 + # via argon2-cffi +arrow==1.3.0 + # via isoduration +asttokens==2.4.1 + # via stack-data +async-lru==2.0.4 + # via jupyterlab +async-timeout==4.0.3 + # via aiohttp +attrs==23.2.0 + # via + # aiohttp + # frictionless + # jsonschema +babel==2.14.0 + # via jupyterlab-server +beautifulsoup4==4.12.2 + # via nbconvert +bioblend==1.2.0 + # via gxformat2 +bleach==6.1.0 + # via nbconvert +cachecontrol[filecache]==0.13.1 + # via schema-salad +certifi==2023.11.17 + # via requests +cffi==1.16.0 + # via argon2-cffi-bindings +chardet==5.2.0 + # via frictionless +charset-normalizer==3.3.2 + # via requests +click==8.1.7 + # via + # rocrate + # typer +colorama==0.4.6 + # via typer +comm==0.2.1 + # via ipykernel +debugpy==1.8.0 + # via ipykernel +decorator==5.1.1 + # via ipython +defusedxml==0.7.1 + # via nbconvert +exceptiongroup==1.2.0 + # via + # anyio + # ipython +executing==2.0.1 + # via stack-data +fastjsonschema==2.19.1 + # via nbformat +filelock==3.13.1 + # via cachecontrol +fqdn==1.5.1 + # via jsonschema +frictionless==5.16.0 + # via -r requirements.in +frozenlist==1.4.1 + # via + # aiohttp + # aiosignal +galaxy2cwl==0.1.4 + # via rocrate +gxformat2==0.18.0 + # via galaxy2cwl +humanize==4.9.0 + # via + # -r requirements.in + # frictionless +idna==3.6 + # via + # anyio + # jsonschema + # requests + # yarl +ipykernel==6.28.0 + # via jupyterlab +ipython==8.19.0 + # via ipykernel +isodate==0.6.1 + # via + # frictionless + # rdflib +isoduration==20.11.0 + # via jsonschema +jedi==0.19.1 + # via ipython +jinja2==3.1.2 + # via + # frictionless + # jupyter-server + # jupyterlab + # jupyterlab-server + # nbconvert + # rocrate +json5==0.9.14 + # via jupyterlab-server +jsonpointer==2.4 + # via jsonschema +jsonschema[format-nongpl]==4.17.3 + # via + # frictionless + # jupyter-events + # jupyterlab-server + # nbformat +jupyter-client==8.6.0 + # via + # ipykernel + # jupyter-server + # nbclient +jupyter-core==5.7.0 + # via + # ipykernel + # jupyter-client + # jupyter-server + # jupyterlab + # nbclient + # nbconvert + # nbformat +jupyter-events==0.6.3 + # via jupyter-server +jupyter-lsp==2.2.1 + # via jupyterlab +jupyter-server==2.10.0 + # via + # jupyter-lsp + # jupyterlab + # jupyterlab-server + # notebook-shim +jupyter-server-terminals==0.5.1 + # via jupyter-server +jupyterlab==4.0.10 + # via -r requirements.in +jupyterlab-pygments==0.3.0 + # via nbconvert +jupyterlab-server==2.24.0 + # via jupyterlab +markdown-it-py==3.0.0 + # via rich +marko==2.0.2 + # via frictionless +markupsafe==2.1.3 + # via + # jinja2 + # nbconvert +matplotlib-inline==0.1.6 + # via + # ipykernel + # ipython +mdurl==0.1.2 + # via markdown-it-py +mistune==2.0.5 + # via + # nbconvert + # schema-salad +msgpack==1.0.7 + # via cachecontrol +multidict==6.0.4 + # via + # aiohttp + # yarl +mypy-extensions==1.0.0 + # via schema-salad +nbclient==0.9.0 + # via nbconvert +nbconvert==7.14.0 + # via jupyter-server +nbformat==5.9.2 + # via + # jupyter-server + # nbclient + # nbconvert +nest-asyncio==1.5.8 + # via ipykernel +notebook-shim==0.2.3 + # via jupyterlab +numpy==1.26.3 + # via pandas +overrides==7.4.0 + # via jupyter-server +packaging==23.2 + # via + # ipykernel + # jupyter-server + # jupyterlab + # jupyterlab-server + # nbconvert +pandas==2.1.4 + # via -r requirements.in +pandocfilters==1.5.0 + # via nbconvert +parso==0.8.3 + # via jedi +petl==1.7.14 + # via frictionless +pexpect==4.9.0 + # via ipython +platformdirs==4.1.0 + # via jupyter-core +prometheus-client==0.19.0 + # via jupyter-server +prompt-toolkit==3.0.43 + # via ipython +psutil==5.9.7 + # via ipykernel +ptyprocess==0.7.0 + # via + # pexpect + # terminado +pure-eval==0.2.2 + # via stack-data +pycparser==2.21 + # via cffi +pydantic==2.5.3 + # via frictionless +pydantic-core==2.14.6 + # via pydantic +pygments==2.17.2 + # via + # ipython + # nbconvert + # rich +pyparsing==3.1.1 + # via rdflib +pyrsistent==0.20.0 + # via jsonschema +python-dateutil==2.8.2 + # via + # arrow + # frictionless + # jupyter-client + # pandas + # rocrate +python-json-logger==2.0.7 + # via jupyter-events +python-slugify==8.0.1 + # via frictionless +pytz==2023.3.post1 + # via pandas +pyyaml==6.0.1 + # via + # frictionless + # galaxy2cwl + # gxformat2 + # jupyter-events +pyzmq==25.1.2 + # via + # ipykernel + # jupyter-client + # jupyter-server +rdflib==7.0.0 + # via + # -r requirements.in + # schema-salad +requests==2.31.0 + # via + # bioblend + # cachecontrol + # frictionless + # jupyterlab-server + # requests-toolbelt + # rocrate + # schema-salad + # tuspy +requests-toolbelt==1.0.0 + # via bioblend +rfc3339-validator==0.1.4 + # via + # jsonschema + # jupyter-events +rfc3986==2.0.0 + # via frictionless +rfc3986-validator==0.1.1 + # via + # jsonschema + # jupyter-events +rich==13.7.0 + # via typer +rocrate==0.9.0 + # via -r requirements.in +ruamel-yaml==0.18.5 + # via schema-salad +ruamel-yaml-clib==0.2.8 + # via ruamel-yaml +schema-salad==8.5.20240102191335 + # via gxformat2 +send2trash==1.8.2 + # via jupyter-server +shellingham==1.5.4 + # via typer +simpleeval==0.9.13 + # via frictionless +six==1.16.0 + # via + # asttokens + # bleach + # isodate + # python-dateutil + # rfc3339-validator +sniffio==1.3.0 + # via anyio +soupsieve==2.5 + # via beautifulsoup4 +stack-data==0.6.3 + # via ipython +stringcase==1.2.0 + # via frictionless +tabulate==0.9.0 + # via frictionless +terminado==0.18.0 + # via + # jupyter-server + # jupyter-server-terminals +text-unidecode==1.3 + # via python-slugify +tinycss2==1.2.1 + # via nbconvert +tinydb==4.8.0 + # via tuspy +tomli==2.0.1 + # via jupyterlab +tornado==6.4 + # via + # ipykernel + # jupyter-client + # jupyter-server + # jupyterlab + # terminado +traitlets==5.14.1 + # via + # comm + # ipykernel + # ipython + # jupyter-client + # jupyter-core + # jupyter-events + # jupyter-server + # jupyterlab + # matplotlib-inline + # nbclient + # nbconvert + # nbformat +tuspy==1.0.3 + # via bioblend +typer[all]==0.9.0 + # via frictionless +types-python-dateutil==2.8.19.20240106 + # via arrow +typing-extensions==4.9.0 + # via + # anyio + # async-lru + # bioblend + # frictionless + # pydantic + # pydantic-core + # typer +tzdata==2023.4 + # via pandas +uri-template==1.3.0 + # via jsonschema +urllib3==2.1.0 + # via requests +validators==0.22.0 + # via frictionless +wcwidth==0.2.13 + # via prompt-toolkit +webcolors==1.13 + # via jsonschema +webencodings==0.5.1 + # via + # bleach + # tinycss2 +websocket-client==1.7.0 + # via jupyter-server +yarl==1.9.4 + # via aiohttp diff --git a/ro-crate-metadata.json.report.txt b/ro-crate-metadata.json.report.txt new file mode 100644 index 0000000..de57384 --- /dev/null +++ b/ro-crate-metadata.json.report.txt @@ -0,0 +1,8 @@ +CLEANED: /Users/nicky/Projects/CentralLibraryData/ro-crate-metadata.json -> /Users/nicky/Projects/CentralLibraryData/cleaned_data/ro-crate-metadata.json (dupes=0, nulls=0) + +=== REPORT: /Users/nicky/Projects/CentralLibraryData/ro-crate-metadata.json === +file type: .json +status: changed +output: /Users/nicky/Projects/CentralLibraryData/cleaned_data/ro-crate-metadata.json +duplicates removed: 0 +null rows removed: 0 diff --git a/sample_data_java.zip b/sample_data_java.zip new file mode 100644 index 0000000..204f6f1 Binary files /dev/null and b/sample_data_java.zip differ diff --git a/tcc_ceds_music.csv.report.txt b/tcc_ceds_music.csv.report.txt new file mode 100644 index 0000000..a178084 --- /dev/null +++ b/tcc_ceds_music.csv.report.txt @@ -0,0 +1,85 @@ + +=== PROFILE: /Users/nicky/Projects/CentralLibraryData/tcc_ceds_music.csv === +rows: 28372 +columns: 31 +column names and dtypes: +Unnamed: 0 int64 +artist_name str +track_name str +release_date int64 +genre str +lyrics str +len int64 +dating float64 +violence float64 +world/life float64 +night/time float64 +shake the audience float64 +family/gospel float64 +romantic float64 +communication float64 +obscene float64 +music float64 +movement/places float64 +light/visual perceptions float64 +family/spiritual float64 +like/girls float64 +sadness float64 +feelings float64 +danceability float64 +loudness float64 +acousticness float64 +instrumentalness float64 +valence float64 +energy float64 +topic str +age float64 + +first 5 rows: + Unnamed: 0 artist_name track_name release_date genre lyrics len dating violence world/life night/time shake the audience family/gospel romantic communication obscene music movement/places light/visual perceptions family/spiritual like/girls sadness feelings danceability loudness acousticness instrumentalness valence energy topic age + 0 mukesh mohabbat bhi jhoothi 1950 pop hold time feel break feel untrue convince speak voice tear try hold hurt try forgive okay play break string feel heart want feel tell real truth hurt lie worse anymore little turn dust play house ruin run leave save like chase train late late tear try hold hurt try forgive okay play break string feel heart want feel tell real truth hurt lie worse anymore little run leave save like chase train know late late play break string feel heart want feel tell real truth hurt lie worse anymore little know little hold time feel 95 0.000598 0.063746 0.000598 0.000598 0.000598 0.048857 0.017104 0.263751 0.000598 0.039288 0.000598 0.000598 0.000598 0.000598 0.380299 0.117175 0.357739 0.454119 0.997992 0.901822 0.339448 0.137110 sadness 1.0 + 4 frankie laine i believe 1950 pop believe drop rain fall grow believe darkest night candle glow believe go astray come believe believe believe smallest prayer hear believe great hear word time hear bear baby touch leaf believe believe believe lord heaven guide sin hide believe calvary die pierce believe death rise meet heaven loud amen know believe 51 0.035537 0.096777 0.443435 0.001284 0.001284 0.027007 0.001284 0.001284 0.001284 0.118034 0.001284 0.212681 0.051124 0.001284 0.001284 0.001284 0.331745 0.647540 0.954819 0.000002 0.325021 0.263240 world/life 1.0 + 6 johnnie ray cry 1950 pop sweetheart send letter goodbye secret feel better wake dream think real false emotions feel heartaches hang long blue get bluer song remember cloudy hair 24 0.002770 0.002770 0.002770 0.002770 0.002770 0.002770 0.158564 0.250668 0.002770 0.323794 0.002770 0.002770 0.002770 0.002770 0.002770 0.225422 0.456298 0.585288 0.840361 0.000000 0.351814 0.139112 music 1.0 + 10 pérez prado patricia 1950 pop kiss lips want stroll charm mambo chacha meringue heaven arm japan brag geisha care long uncle eye starry sort gleam like million dollar dream come true everybody wish steal heart away guess try eye starry sort gleam like million dollar dream come true kiss lips want stroll charm japan brag geisha care long uncle 54 0.048249 0.001548 0.001548 0.001548 0.021500 0.001548 0.411536 0.001548 0.001548 0.001548 0.129250 0.001548 0.001548 0.081132 0.225889 0.001548 0.686992 0.744404 0.083935 0.199393 0.775350 0.743736 romantic 1.0 + 12 giorgos papadopoulos apopse eida oneiro 1950 pop till darling till matter know till dream live apart know hearts till world free oceans cross mountains climb know gain loss pray loss time till dream till memory till hold till oceans cross mountains climb know gain loss pray loss time till dream till memory till hold till 48 0.001350 0.001350 0.417772 0.001350 0.001350 0.001350 0.463430 0.001350 0.001350 0.001350 0.001350 0.001350 0.029755 0.001350 0.068800 0.001350 0.291671 0.646489 0.975904 0.000246 0.597073 0.394375 romantic 1.0 + +null report (sorted): + null_count null_pct +Unnamed: 0 0 0.0 +artist_name 0 0.0 +track_name 0 0.0 +release_date 0 0.0 +genre 0 0.0 +lyrics 0 0.0 +len 0 0.0 +dating 0 0.0 +violence 0 0.0 +world/life 0 0.0 +night/time 0 0.0 +shake the audience 0 0.0 +family/gospel 0 0.0 +romantic 0 0.0 +communication 0 0.0 +obscene 0 0.0 +music 0 0.0 +movement/places 0 0.0 +light/visual perceptions 0 0.0 +family/spiritual 0 0.0 +like/girls 0 0.0 +sadness 0 0.0 +feelings 0 0.0 +danceability 0 0.0 +loudness 0 0.0 +acousticness 0 0.0 +instrumentalness 0 0.0 +valence 0 0.0 +energy 0 0.0 +topic 0 0.0 +age 0 0.0 + +exact duplicate rows: 0 + +date parsing checks: +unparseable dates in release_date: 0 +CLEANED: /Users/nicky/Projects/CentralLibraryData/tcc_ceds_music.csv -> /Users/nicky/Projects/CentralLibraryData/cleaned_data/tcc_ceds_music.csv (dupes=0, nulls=0) +validation passed: tcc_ceds_music.csv diff --git a/test_sample_contract.py b/test_sample_contract.py new file mode 100644 index 0000000..a596798 --- /dev/null +++ b/test_sample_contract.py @@ -0,0 +1,212 @@ +"""Pytest contract tests for golden sample business rules. + +Mirrors the Java GoldenSampleContractTest so both teams assert +the identical contract. Covers: + Rule 1 – referential integrity (issue.title_id in title.id) + Rule 2 – issue_count matches linked rows + Rule 3 – title start_date <= end_date + Rule 4 – title start_year <= end_year + Rule 5 – issue.date within title date range + Rule 6 – issue.pages is a positive integer +""" + +from __future__ import annotations + +import csv +import io +from pathlib import Path + +import pytest + +from validate_sample import validate + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _csv(headers: list[str], rows: list[list[str]]) -> io.StringIO: + buf = io.StringIO() + writer = csv.writer(buf) + writer.writerow(headers) + writer.writerows(rows) + buf.seek(0) + return buf + + +def _write_csv(tmp_path: Path, name: str, headers: list[str], rows: list[list[str]]) -> Path: + p = tmp_path / name + p.write_text(_csv(headers, rows).getvalue(), encoding="utf-8") + return p + + +TITLE_HEADERS = ["id", "title", "description", "publisher", "trove_url", + "download_text", "issue_count", "start_date", "end_date", + "start_year", "end_year", "extent", "place", "issn", "catalogue_url"] + +ISSUE_HEADERS = ["id", "title_id", "title", "description", "date", "url", + "pages", "text_download_url"] + +GOOD_TITLE = [ + "nla.obj-9000000001", "Sample Gazette", "Demo", "Publisher", + "https://nla.gov.au/nla.obj-9000000001", "https://example.org/sample.zip", + "1", "2020-01-15", "2020-01-15", "2020", "2020", + "1 volume", "Australia", "1234-5678", "https://nla.gov.au/nla.cat-vn1", +] + +GOOD_ISSUE = [ + "nla.obj-9000001001", "nla.obj-9000000001", "Sample Gazette", + "Volume 1 Number 1", "2020-01-15", + "https://nla.gov.au/nla.obj-9000001001", "24", + "https://trove.nla.gov.au/nla.obj-9000001001/download", +] + + +# --------------------------------------------------------------------------- +# Canonical golden sample — must be violation-free +# --------------------------------------------------------------------------- + +class TestGoldenSampleFiles: + """Load the actual files shipped in both package variants.""" + + PACKAGES = [ + Path(__file__).parent / "java_team_package_small", + Path(__file__).parent / "java_team_package_small_cleaned", + ] + + @pytest.mark.parametrize("pkg_dir", PACKAGES, ids=["small", "small_cleaned"]) + def test_no_violations(self, pkg_dir: Path) -> None: + titles = pkg_dir / "valid-periodical-titles-sample.csv" + issues = pkg_dir / "valid-periodical-issues-sample.csv" + assert titles.exists(), f"Missing {titles}" + assert issues.exists(), f"Missing {issues}" + + violations = validate(titles, issues) + assert violations == [], "Violations found:\n" + "\n".join(str(v) for v in violations) + + +# --------------------------------------------------------------------------- +# Rule-specific unit tests using in-memory CSVs +# --------------------------------------------------------------------------- + +class TestRule1ReferentialIntegrity: + def test_matching_title_id_passes(self, tmp_path: Path) -> None: + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [GOOD_TITLE]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, [GOOD_ISSUE]) + assert validate(titles, issues) == [] + + def test_unknown_title_id_fails(self, tmp_path: Path) -> None: + bad_issue = GOOD_ISSUE[:1] + ["nla.obj-UNKNOWN"] + GOOD_ISSUE[2:] + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [GOOD_TITLE]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, [bad_issue]) + violations = validate(titles, issues) + assert any("rule1" in v.rule for v in violations) + + +class TestRule2IssueCount: + def test_matching_count_passes(self, tmp_path: Path) -> None: + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [GOOD_TITLE]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, [GOOD_ISSUE]) + assert validate(titles, issues) == [] + + def test_mismatched_count_fails(self, tmp_path: Path) -> None: + # title says issue_count=2 but only 1 issue row + bad_title = GOOD_TITLE[:6] + ["2"] + GOOD_TITLE[7:] + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [bad_title]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, [GOOD_ISSUE]) + violations = validate(titles, issues) + assert any("rule2" in v.rule for v in violations) + + def test_zero_count_with_no_issues_passes(self, tmp_path: Path) -> None: + no_issues_title = GOOD_TITLE[:6] + ["0"] + GOOD_TITLE[7:] + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [no_issues_title]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, []) + assert validate(titles, issues) == [] + + +class TestRule3DateOrder: + def test_equal_dates_pass(self, tmp_path: Path) -> None: + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [GOOD_TITLE]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, [GOOD_ISSUE]) + assert validate(titles, issues) == [] + + def test_start_after_end_fails(self, tmp_path: Path) -> None: + bad_title = GOOD_TITLE[:7] + ["2021-01-01", "2020-01-01"] + GOOD_TITLE[9:] + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [bad_title]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, []) + violations = validate(titles, issues) + assert any("rule3" in v.rule for v in violations) + + def test_missing_end_date_passes(self, tmp_path: Path) -> None: + open_ended_title = GOOD_TITLE[:8] + [""] + GOOD_TITLE[9:] + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [open_ended_title]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, []) + assert validate(titles, issues) == [] + + +class TestRule4YearOrder: + def test_equal_years_pass(self, tmp_path: Path) -> None: + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [GOOD_TITLE]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, [GOOD_ISSUE]) + assert validate(titles, issues) == [] + + def test_start_year_after_end_year_fails(self, tmp_path: Path) -> None: + bad_title = GOOD_TITLE[:9] + ["2022", "2019"] + GOOD_TITLE[11:] + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [bad_title]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, []) + violations = validate(titles, issues) + assert any("rule4" in v.rule for v in violations) + + +class TestRule5IssueDateInRange: + def test_date_on_boundary_passes(self, tmp_path: Path) -> None: + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [GOOD_TITLE]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, [GOOD_ISSUE]) + assert validate(titles, issues) == [] + + def test_date_before_start_fails(self, tmp_path: Path) -> None: + early_issue = GOOD_ISSUE[:4] + ["2019-12-31"] + GOOD_ISSUE[5:] + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [GOOD_TITLE]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, [early_issue]) + violations = validate(titles, issues) + assert any("rule5" in v.rule for v in violations) + + def test_date_after_end_fails(self, tmp_path: Path) -> None: + late_issue = GOOD_ISSUE[:4] + ["2021-01-01"] + GOOD_ISSUE[5:] + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [GOOD_TITLE]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, [late_issue]) + violations = validate(titles, issues) + assert any("rule5" in v.rule for v in violations) + + def test_missing_issue_date_skipped(self, tmp_path: Path) -> None: + no_date_issue = GOOD_ISSUE[:4] + [""] + GOOD_ISSUE[5:] + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [GOOD_TITLE]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, [no_date_issue]) + assert validate(titles, issues) == [] + + +class TestRule6PagesPositive: + def test_positive_pages_pass(self, tmp_path: Path) -> None: + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [GOOD_TITLE]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, [GOOD_ISSUE]) + assert validate(titles, issues) == [] + + def test_zero_pages_fails(self, tmp_path: Path) -> None: + zero_pages_issue = GOOD_ISSUE[:6] + ["0"] + GOOD_ISSUE[7:] + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [GOOD_TITLE]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, [zero_pages_issue]) + violations = validate(titles, issues) + assert any("rule6" in v.rule for v in violations) + + def test_negative_pages_fails(self, tmp_path: Path) -> None: + neg_pages_issue = GOOD_ISSUE[:6] + ["-5"] + GOOD_ISSUE[7:] + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [GOOD_TITLE]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, [neg_pages_issue]) + violations = validate(titles, issues) + assert any("rule6" in v.rule for v in violations) + + def test_missing_pages_skipped(self, tmp_path: Path) -> None: + no_pages_issue = GOOD_ISSUE[:6] + [""] + GOOD_ISSUE[7:] + titles = _write_csv(tmp_path, "titles.csv", TITLE_HEADERS, [GOOD_TITLE]) + issues = _write_csv(tmp_path, "issues.csv", ISSUE_HEADERS, [no_pages_issue]) + assert validate(titles, issues) == [] diff --git a/tests/test_ingestion.py b/tests/test_ingestion.py new file mode 100644 index 0000000..cc5d45b --- /dev/null +++ b/tests/test_ingestion.py @@ -0,0 +1,28 @@ +from pathlib import Path + +import pytest + +from export_java_class_data import assert_files_exist, resolve_input_path + + +def test_assert_files_exist_raises_for_missing_file(tmp_path: Path) -> None: + existing = tmp_path / "exists.csv" + existing.write_text("id\n1\n", encoding="utf-8") + + missing = tmp_path / "missing.csv" + with pytest.raises(FileNotFoundError): + assert_files_exist([existing, missing]) + + +def test_resolve_input_path_prefers_cleaned_when_available(tmp_path: Path) -> None: + data_root = tmp_path / "data" + cleaned_root = tmp_path / "cleaned" + data_root.mkdir() + cleaned_root.mkdir() + + (data_root / "pg_catalog.csv").write_text("id\n1\n", encoding="utf-8") + cleaned_file = cleaned_root / "pg_catalog.csv" + cleaned_file.write_text("id\n2\n", encoding="utf-8") + + chosen = resolve_input_path(data_root, cleaned_root, "pg_catalog.csv", prefer_cleaned=True) + assert chosen == cleaned_file diff --git a/tests/test_transforms.py b/tests/test_transforms.py new file mode 100644 index 0000000..882b449 --- /dev/null +++ b/tests/test_transforms.py @@ -0,0 +1,44 @@ +import pandas as pd + +from export_java_class_data import clean_frame, parse_volume_issue + + +def test_clean_frame_writes_empty_for_optional_blanks() -> None: + frame = pd.DataFrame( + { + "ID": ["1", "2", ""], + "Title": ["Alpha", "Beta", "Gamma"], + "Genre": ["", "Sci-Fi", "Drama"], + } + ) + + cleaned = clean_frame(frame, ["ID", "Title"]) + + assert len(cleaned) == 2 + assert cleaned.loc[0, "Genre"] == "empty" + assert cleaned.loc[1, "Genre"] == "Sci-Fi" + + +def test_parse_volume_issue_extracts_values() -> None: + volume, issue = parse_volume_issue("Volume 12 Number 3") + assert volume == "12" + assert issue == "3" + + +def test_accuracy_spot_check_transform_example() -> None: + frame = pd.DataFrame( + { + "ID": ["100"], + "Title": [" Example Title "], + "Location": [""], + } + ) + + cleaned = clean_frame(frame, ["ID", "Title"]) + record = cleaned.iloc[0].to_dict() + + assert record == { + "ID": "100", + "Title": "Example Title", + "Location": "empty", + } diff --git a/tests/test_validation.py b/tests/test_validation.py new file mode 100644 index 0000000..dd2316c --- /dev/null +++ b/tests/test_validation.py @@ -0,0 +1,22 @@ +import pandas as pd +import pytest + +from export_java_class_data import enforce_unique_ids, require_columns, standardize_date + + +def test_require_columns_raises_when_missing() -> None: + frame = pd.DataFrame({"a": [1], "b": [2]}) + with pytest.raises(ValueError): + require_columns(frame, ["a", "c"], "demo.csv") + + +def test_enforce_unique_ids_raises_for_duplicates() -> None: + frame = pd.DataFrame({"ID": ["1", "1", "2"]}) + with pytest.raises(ValueError): + enforce_unique_ids(frame, "DVD", "ID") + + +def test_standardize_date_common_formats() -> None: + assert standardize_date("2026/04/24") == "2026-04-24" + assert standardize_date("Apr 24, 2026") == "2026-04-24" + assert standardize_date("not-a-date") == "" diff --git a/titles-issues-added.ndjson.report.txt b/titles-issues-added.ndjson.report.txt new file mode 100644 index 0000000..d9fb943 --- /dev/null +++ b/titles-issues-added.ndjson.report.txt @@ -0,0 +1,8 @@ +CLEANED: /Users/nicky/Projects/CentralLibraryData/titles-issues-added.ndjson -> /Users/nicky/Projects/CentralLibraryData/cleaned_data/titles-issues-added.ndjson (dupes=0, nulls=0) + +=== REPORT: /Users/nicky/Projects/CentralLibraryData/titles-issues-added.ndjson === +file type: .ndjson +status: changed +output: /Users/nicky/Projects/CentralLibraryData/cleaned_data/titles-issues-added.ndjson +duplicates removed: 0 +null rows removed: 0 diff --git a/update_data_crate.py b/update_data_crate.py new file mode 100644 index 0000000..d855914 --- /dev/null +++ b/update_data_crate.py @@ -0,0 +1,211 @@ +# Use this to update an existing RO-Crate file generated from a +# GLAM Workbench notebook repository (using the --data-repo option) +# If there are new datafiles, you'll need to regenerate the RO-Crate from the +# notebook repo. + +import argparse +from frictionless import describe, Resource, Detector +from pathlib import Path +import datetime +from rocrate.rocrate import ROCrate, ContextEntity +import os + +schema_props = { + "@type" :["File"], + "encodingFormat": "application/json", + "conformsTo": {"@id" : "https://specs.frictionlessdata.io/table-schema/"} +} + +def add_update_action(crate, version): + """ + Adds an UpdateAction to the crate when the repo version is updated. + """ + # Create an id for the action using the version number + action_id = f"create_version_{version.replace('.', '_')}" + + # Set basic properties for action + properties = { + "@type": "UpdateAction", + "endDate": datetime.datetime.now().strftime("%Y-%m-%d"), + "name": f"Create version {version}", + "actionStatus": {"@id": "http://schema.org/CompletedActionStatus"}, + } + + # Create entity + crate.add(ContextEntity(crate, action_id, properties=properties)) + +def get_create_action(crate, datafile): + actions = crate.get_by_type("CreateAction") + for action in actions: + props = action.properties() + for result in props["result"]: + if result["@id"] == datafile: + return action + +def check_schema(file_path): + schema_name = Path(f"{file_path.with_suffix('').name}-schema.json") + if schema_name.exists(): + resource = Resource(file_path, schema=str(schema_name)) + report = resource.validate() + if not report.valid: + print(f"Validation failed: {file_path}") + print(report) + #print(report) + else: + detector = Detector(sample_size=1000) + schema = describe(file_path.name, type='schema', detector=detector) + schema.to_json(schema_name) + return schema_name + +def find_crate_root() -> Path: + """Find the nearest directory containing ro-crate-metadata.json.""" + script_dir = Path(__file__).resolve().parent + for candidate in (script_dir, script_dir.parent, Path.cwd()): + if (candidate / "ro-crate-metadata.json").exists(): + return candidate + raise FileNotFoundError("Could not find ro-crate-metadata.json") + +def main(version): + crate_root = find_crate_root() + crate = ROCrate(str(crate_root)) + repo_url = crate.get("./").properties()["url"] + original_cwd = Path.cwd() + + # Use crate root so relative file paths in metadata resolve correctly. + try: + os.chdir(crate_root) + for datafile in crate.get_by_type("Dataset"): + # Ignore root object and external resources + if datafile.id != "./" and not datafile.id.startswith("http"): + data_props = datafile.properties() + file_path = Path(data_props["name"]) + print(data_props) + # Update file stats + stats = file_path.stat() + date_modified = datetime.datetime.fromtimestamp(stats.st_mtime).strftime("%Y-%m-%d") + if file_path.is_dir(): + size = len(list(file_path.glob("*"))) + data_props.update({"dateModified": date_modified, "size": size}) + elif file_path.name.endswith((".zip", ".db")): + data_props.update({"contentSize": stats.st_size, "dateModified": date_modified}) + else: + rows = 0 + with file_path.open("r") as df: + for line in df: + rows += 1 + data_props.update({"contentSize": stats.st_size, "dateModified": date_modified, "size": rows}) + + # Add/update schema + if datafile.get("encodingFormat") == "text/csv": + schema_name = check_schema(file_path) + data_props.update({"conformsTo": {"@id": str(schema_name)}}) + schema_props["name"] = f"Frictionless Table Schema for {data_props['name']} dataset" + schema_props["url"] = f"{repo_url.strip('/')}/raw/main/{schema_name}" + crate.add_file(str(schema_name), properties=schema_props) + + # Update CreateAction + # action = get_create_action(crate, str(file_path)) + # action.properties().update({"endDate": date_modified}) + + actions = crate.get_by_type("CreateAction") + for action in actions: + action_dates = [] + props = action.properties() + for result in props["result"]: + result_file = crate.dereference(result["@id"]) + action_dates.append(result_file.properties()["dateModified"]) + latest_date = sorted(action_dates)[-1] + action.properties().update({"endDate": latest_date}) + + # Update version + if version: + crate.update_jsonld( + { + "@id": "./", + "version": version, + "datePublished": datetime.datetime.now().strftime("%Y-%m-%d"), + } + ) + add_update_action(crate, version) + else: + crate.update_jsonld( + { + "@id": "./", + "datePublished": datetime.datetime.now().strftime("%Y-%m-%d"), + } + ) + crate.write("./") + finally: + os.chdir(original_cwd) +def main(version): + os.chdir(Path(__file__).resolve().parent.parent) + crate = ROCrate("./") + repo_url = crate.get("./").properties()["url"] + for datafile in crate.get_by_type("Dataset"): + # Ignore root object and external resources + if datafile.id != "./" and not datafile.id.startswith("http"): + data_props = datafile.properties() + file_path = Path(data_props["name"]) + print(data_props) + # Update file stats + stats = file_path.stat() + date_modified = datetime.datetime.fromtimestamp(stats.st_mtime).strftime("%Y-%m-%d") + if file_path.is_dir(): + size = len(list(file_path.glob("*"))) + data_props.update({"dateModified": date_modified, "size": size}) + elif file_path.name.endswith((".zip", ".db")): + data_props.update({"contentSize": stats.st_size, "dateModified": date_modified}) + else: + rows = 0 + with file_path.open("r") as df: + for line in df: + rows += 1 + data_props.update({"contentSize": stats.st_size, "dateModified": date_modified, "size": rows}) + + # Add/update schema + if datafile.get("encodingFormat") == "text/csv": + schema_name = check_schema(file_path) + data_props.update({"conformsTo": {"@id": str(schema_name)}}) + schema_props["name"] = f"Frictionless Table Schema for {data_props['name']} dataset" + schema_props["url"] = f"{repo_url.strip('/')}/raw/main/{schema_name}" + crate.add_file(str(schema_name), properties=schema_props) + + # Update CreateAction + # action = get_create_action(crate, str(file_path)) + # action.properties().update({"endDate": date_modified}) + actions = crate.get_by_type("CreateAction") + for action in actions: + action_dates = [] + props = action.properties() + for result in props["result"]: + result_file = crate.dereference(result["@id"]) + action_dates.append(result_file.properties()["dateModified"]) + latest_date = sorted(action_dates)[-1] + action.properties().update({"endDate": latest_date}) + + # Update version + if version: + crate.update_jsonld( + { + "@id": "./", + "version": version, + "datePublished": datetime.datetime.now().strftime("%Y-%m-%d"), + } + ) + add_update_action(crate, version) + else: + crate.update_jsonld( + { + "@id": "./", + "datePublished": datetime.datetime.now().strftime("%Y-%m-%d"), + } + ) + crate.write("./") + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--version", type=str, help="New version number", required=False + ) + args = parser.parse_args() + main(args.version) diff --git a/validate_sample.py b/validate_sample.py new file mode 100644 index 0000000..98e0658 --- /dev/null +++ b/validate_sample.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +"""Validate golden valid-sample CSV files against the data contract business rules. + +Business rules (must all pass): + 1. Every issue.title_id must match an existing title.id + 2. title.issue_count must equal the number of linked issue rows + 3. title.start_date <= title.end_date (when both are present) + 4. title.start_year <= title.end_year (when both are present) + 5. issue.date must fall within the title date range (when all three are present) + 6. issue.pages must be a positive integer + +Usage: + python validate_sample.py + + # Validate both package variants in one call: + python validate_sample.py \ + java_team_package_small/valid-periodical-titles-sample.csv \ + java_team_package_small/valid-periodical-issues-sample.csv + +Exit code: 0 on success, 1 on any violation. +""" + +from __future__ import annotations + +import argparse +import csv +import sys +from datetime import date +from pathlib import Path + + +def _parse_date(value: str) -> date | None: + value = value.strip() + if not value: + return None + return date.fromisoformat(value) + + +def _parse_int(value: str) -> int | None: + value = value.strip() + if not value: + return None + return int(value) + + +def load_csv(path: Path) -> list[dict[str, str]]: + with path.open(encoding="utf-8-sig", newline="") as fh: + return list(csv.DictReader(fh)) + + +class Violation: + def __init__(self, rule: str, detail: str) -> None: + self.rule = rule + self.detail = detail + + def __str__(self) -> str: + return f"[{self.rule}] {self.detail}" + + +def validate(titles_path: Path, issues_path: Path) -> list[Violation]: + titles = load_csv(titles_path) + issues = load_csv(issues_path) + violations: list[Violation] = [] + + # Index titles by id + title_by_id: dict[str, dict[str, str]] = {} + for t in titles: + tid = t.get("id", "").strip() + if tid: + title_by_id[tid] = t + + # Build count of issues per title_id + issues_per_title: dict[str, int] = {} + for issue in issues: + tid = issue.get("title_id", "").strip() + issues_per_title[tid] = issues_per_title.get(tid, 0) + 1 + + # --- Rule 1 & 2: referential integrity + issue_count --- + for issue in issues: + iid = issue.get("id", "").strip() + tid = issue.get("title_id", "").strip() + if tid not in title_by_id: + violations.append(Violation( + "rule1-referential-integrity", + f"issue {iid!r}: title_id {tid!r} has no matching title.id", + )) + + for t in titles: + tid = t.get("id", "").strip() + try: + declared_count = _parse_int(t.get("issue_count", "")) + except ValueError: + violations.append(Violation( + "rule2-issue-count", + f"title {tid!r}: issue_count is not an integer", + )) + continue + if declared_count is None: + continue + actual_count = issues_per_title.get(tid, 0) + if declared_count != actual_count: + violations.append(Violation( + "rule2-issue-count", + f"title {tid!r}: issue_count={declared_count} but {actual_count} linked issue row(s) in file", + )) + + # --- Rules 3 & 4: title date/year ordering --- + for t in titles: + tid = t.get("id", "").strip() + try: + start_date = _parse_date(t.get("start_date", "")) + end_date = _parse_date(t.get("end_date", "")) + except ValueError as exc: + violations.append(Violation("rule3-date-order", f"title {tid!r}: unparseable date — {exc}")) + continue + + if start_date and end_date and start_date > end_date: + violations.append(Violation( + "rule3-date-order", + f"title {tid!r}: start_date {start_date} is after end_date {end_date}", + )) + + try: + start_year = _parse_int(t.get("start_year", "")) + end_year = _parse_int(t.get("end_year", "")) + except ValueError as exc: + violations.append(Violation("rule4-year-order", f"title {tid!r}: unparseable year — {exc}")) + continue + + if start_year is not None and end_year is not None and start_year > end_year: + violations.append(Violation( + "rule4-year-order", + f"title {tid!r}: start_year {start_year} > end_year {end_year}", + )) + + # --- Rule 5: issue date within title date range --- + for issue in issues: + iid = issue.get("id", "").strip() + tid = issue.get("title_id", "").strip() + title = title_by_id.get(tid) + if not title: + continue # already reported by rule 1 + + try: + issue_date = _parse_date(issue.get("date", "")) + start_date = _parse_date(title.get("start_date", "")) + end_date = _parse_date(title.get("end_date", "")) + except ValueError as exc: + violations.append(Violation("rule5-issue-date-in-range", f"issue {iid!r}: unparseable date — {exc}")) + continue + + if issue_date and start_date and issue_date < start_date: + violations.append(Violation( + "rule5-issue-date-in-range", + f"issue {iid!r}: date {issue_date} is before title start_date {start_date}", + )) + if issue_date and end_date and issue_date > end_date: + violations.append(Violation( + "rule5-issue-date-in-range", + f"issue {iid!r}: date {issue_date} is after title end_date {end_date}", + )) + + # --- Rule 6: positive page count --- + for issue in issues: + iid = issue.get("id", "").strip() + try: + pages = _parse_int(issue.get("pages", "")) + except ValueError: + violations.append(Violation("rule6-pages-positive", f"issue {iid!r}: pages is not an integer")) + continue + if pages is not None and pages <= 0: + violations.append(Violation( + "rule6-pages-positive", + f"issue {iid!r}: pages={pages} must be > 0", + )) + + return violations + + +def main() -> int: + parser = argparse.ArgumentParser(description="Validate golden sample CSVs against business rules.") + parser.add_argument("titles_csv", type=Path, help="Path to valid-periodical-titles-sample.csv") + parser.add_argument("issues_csv", type=Path, help="Path to valid-periodical-issues-sample.csv") + args = parser.parse_args() + + violations = validate(args.titles_csv, args.issues_csv) + if violations: + print(f"FAIL — {len(violations)} violation(s):") + for v in violations: + print(f" {v}") + return 1 + + print("OK — all business rules satisfied.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())