Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 19 additions & 4 deletions khard/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def create_parsers() -> tuple[argparse.ArgumentParser,
new_addressbook_parser.add_argument(
"-a", "--addressbook", default=[],
type=lambda x: [y.strip() for y in x.split(",")],
help="Specify address book in which to create the new contact")
help="Specify address book in which to create the new contact(s)")
copy_move_addressbook_parser = argparse.ArgumentParser(add_help=False)
copy_move_addressbook_parser.add_argument(
"-a", "--addressbook", default=[],
Expand Down Expand Up @@ -207,7 +207,16 @@ def create_parsers() -> tuple[argparse.ArgumentParser,
"-o", "--output-file", default=sys.stdout,
type=argparse.FileType("w"),
help="Specify output template file name or use stdout by default")
subparsers.add_parser("template", help="print an empty yaml template")
template_parser = subparsers.add_parser(
"template",
description="print an empty yaml (default) or CSV template",
help="print an empty yaml (default) or CSV template")
template_parser.add_argument(
"-O", "--format", choices=("yaml", "csv"), default="yaml",
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am confused by the -O, I would have expected -f as a short option. What is the reason?

help="select the template format")
template_parser.add_argument(
"-d", "--delimiter", default=",",
help="Use DELIMITER instead of \",\" for CSV field delimiter")
birthdays_parser = subparsers.add_parser(
"birthdays",
aliases=Actions.get_aliases("birthdays"),
Expand Down Expand Up @@ -259,11 +268,17 @@ def create_parsers() -> tuple[argparse.ArgumentParser,
"new",
aliases=Actions.get_aliases("new"),
parents=[new_addressbook_parser, template_input_file_parser],
description="create a new contact",
help="create a new contact")
description="create a new contact or new contacts",
help="create a new contact or new contacts")
new_parser.add_argument(
"--vcard-version", choices=("3.0", "4.0"), dest='preferred_version',
help="Select preferred vcard version for new contact")
new_parser.add_argument(
"-O", "--format", choices=("yaml", "csv"), default="yaml",
help="Select input format (yaml by default)")
new_parser.add_argument(
"-d", "--delimiter", default=",",
help="Use DELIMITER instead of \",\" for CSV field delimiter")
add_email_parser = subparsers.add_parser(
"add-email",
aliases=Actions.get_aliases("add-email"),
Expand Down
54 changes: 44 additions & 10 deletions khard/contacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1001,10 +1001,11 @@ def _filter_invalid_tags(contents: str) -> str:
flags=re.IGNORECASE)
return contents

@staticmethod
def _parse_yaml(input: str) -> dict:
"""Parse a YAML document into a dictionary and validate the data to
some degree.
@classmethod
def _parse_yaml(cls, input: str) -> dict:
"""Parse a YAML document into a dictionary.

And validate to some degree.

:param input: the YAML document to parse
:returns: the parsed data structure
Expand All @@ -1020,8 +1021,19 @@ def _parse_yaml(input: str) -> dict:
if not contact_data:
raise ValueError("Found no contact information")

# check for available data
# at least enter name or organisation
return cls._validate(contact_data)

@staticmethod
def _validate(contact_data: dict) -> dict:
"""Validate contact data to some degree.

Ensure that at a name or organisation has been entered.

:param contact_data: dict of contact data, as returned by
YAMLEditable._parse_yaml()
:returns: the same value
:raises: ValueError
"""
if not (contact_data.get("First name") or contact_data.get("Last name")
or contact_data.get("Organisation")):
raise ValueError("You must either enter a name or an organisation")
Expand Down Expand Up @@ -1088,12 +1100,18 @@ def _set_date(self, target: str, key: str, data: dict) -> None:
"Use format yyyy-mm-dd or "
"yyyy-mm-ddTHH:MM:SS")

def update(self, input: str) -> None:
"""Update this vcard with some yaml input
def update(self, input: str | dict) -> None:
"""Update this vcard with yaml input or with a dict of contact data.

:param input: a yaml string to parse and then use to update self
:param input: a yaml string to parse and then use to update self, or a
dict of the same structure as the dict returned by
self._parse_yaml()
"""
contact_data = self._parse_yaml(input)
if isinstance(input, str):
contact_data = self._parse_yaml(input)
elif isinstance(input, dict):
contact_data = self._validate(input)

# update rev
self._update_revision()

Expand Down Expand Up @@ -1458,6 +1476,22 @@ def from_yaml(cls, address_book: "address_book.VdirAddressBook", yaml: str,
contact.update(yaml)
return contact

@classmethod
def from_dict(cls, address_book: "address_book.VdirAddressBook",
data: dict,
supported_private_objects: list[str] | None = None,
version: str | None = None, localize_dates: bool = False
) -> "Contact":
"""Use this if you want to create a new contact from a dict.

The dict must have the same structure as the dict returned by
cls._parse_yaml().
"""
contact = cls.new(address_book, supported_private_objects, version,
localize_dates=localize_dates)
contact.update(data)
return contact

@classmethod
def clone_with_yaml_update(cls, contact: "Contact", yaml: str,
localize_dates: bool = False
Expand Down
209 changes: 209 additions & 0 deletions khard/csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
from collections.abc import Iterator
import csv
import re
from typing import Any


class Parser:
"""An iterator over rows in a CSV file that returns contact data."""
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not see any contact data related code in here. I would say this parser can parse csv with nested fields.


def __init__(self, input_from_stdin_or_file: str, delimiter: str) -> None:
"""Parse first row to determine structure of contact data.
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-contact data
+nested field structure


:param input_from_stdin_or_file: A string from stdin, from an input
file specified with "-i" or "--input-file", or otherwise from a
temporary file created by khard and edited by the user.
:param delimiter: The field delimiter ("," by default).
"""
self.reader = csv.reader(input_from_stdin_or_file.split("\n"),
delimiter=delimiter)
first_row = next(self.reader)
self.template, self.columns = self._parse_headers(first_row)

def __iter__(self) -> Iterator[dict]:
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we constraint the dicts in the type signatures more? I think all keys will be strings. And the values might be a union of string, list and dict. When you have to repeat this complex type a lot you ca add an alias at the top of this file or to helpers/typing.py. Compare the aliases there.

return self

def __next__(self) -> dict:
"""Return the next parsed row from the CSV reader.

Iteration stops when "reader" raises "StopIteration", or when row is
blank.

:returns: A dict with the same structure as the dict returned by
khard.YAMLEditable._parse_yaml(). Can be passed to
khard.YAMLEditable.update().
"""
try:
row = next(self.reader)
except StopIteration:
raise
else:
if not row:
raise StopIteration
return self.parse(row)

def parse(self, row: list[str]) -> dict:
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this can be "private". Currently only the iterator interface is used from outside.

"""Get data from a CSV row that can be used to make a new Contact.

:param row: A list of strings, one for each column.
:returns: A dict with the same structure as the dict returned by
khard.YAMLEditable._parse_yaml(). Can be passed to
khard.YAMLEditable.update().
"""
self._get_data(row)
return self._process_data()

@staticmethod
def _parse_headers(first_row: list[str]) -> tuple[dict, list]:
"""Determine the data structure of each contact by parsing first row.

Valid headers have the form "<key>[ <idx>[ - <subkey>]]".

If the column header has the form "<key>", each value in the column is
a string indexed by "<key>". If the column header has the form "<key>
<idx>", each value in the column is a string, at index "<idx - 1>", in
a list indexed by "<key>". If the column header has the form "<key>
<idx> - <subkey>", each value in the column is a value in a dict
indexed by "<subkey>". This dict is in a list indexed by "key", at
index "<idx - 1>".

For example, the following CSV would have the following raw data
structure:

First name,Last name,Organisation 1,Organisation 2,Email 1 -
type,Email 1 - value,Email 2 - type

Bruce,Wayne,Justice League,Wayne
Enterprises,work,thebat@justice.org,work,bruce@wayne.com

{'First name': 'Bruce',
'Last name': 'Wayne',
'Organisation': {1: 'Justice League', 2: 'Wayne Enterprises'},
'Email': {1: {'type': 'work'}, 2: {'value': 'thebat@justice.org'}}}

Note that, rather than actual lists, we use dicts with numeric keys.
This is to avoid making assumptions about how users will structure
their CSV files. For example, if a user for some reason placed "Email
2" before "Email 1", and we were storing email data in a list, that
would lead to an IndexError. A dict, on the other hand, does not care
if key "1" does not yet exist when mapping a value to key "2".

:param first_row: First row of the CSV file, which must contain column
headers.
:returns: The "template" dict and the "columns" list. The structure of
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am loosing track as soon as I try to understand what this should look like and what it should do. Maybe a very simple example would be good, or a simple test just for this function. The example in the doc comment above is not for the return value of this function, or is it?

"template" is determined by the CSV column headers, and all of its
keys are initialized. "columns" is a list of 2-tuples. The first
item in each tuple is the data structure in which each value in
that column belongs. The second item is the index in that data
structure at which the value is located.
"""
template: dict[str, Any] = {}
columns: list[tuple[dict, Any]] = []

headers = re.compile(r"^([a-zA-Z ]+)(?: (\d+))?(?: - ([a-zA-Z ]+))?$")
for val in first_row:
match = headers.search(val)
if not match:
raise ValueError(f"Column header \"{val}\" is invalid.")
else:
key, idx, subkey = match.groups()

if idx:
idx = int(idx) - 1
template.setdefault(key, {})
if subkey:
template[key].setdefault(idx, {})
template[key][idx].update({subkey: None})
columns.append(
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you put these on one line? The line breaks seem unnecessary.

(template[key][idx], subkey)
)
else:
template[key].setdefault(idx, None)
columns.append(
(template[key], idx)
)
else:
template[key] = None
columns.append(
(template, key)
)

return template, columns

def _get_data(self, row: list[str]) -> None:
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems to me that this function stores its "output" on self via the local variable data_structure. The comment also tries to explain this somehow.

It seems to me that this data is only read by _process_data and the function _get_data and _process_data are only called right after each other in parse(). Can this function be refactored to return the data it would store on self.template and then the instance variable can be removed?

"""Populate "self.template" with data using info in "self.columns".

We have to fill in "self.template" in place, rather than a copy,
because the data structures referenced in "self.columns" point to
"self.template" itself. This approach is safe because every value in
"self.template" is overwritten on every iteration.
"""
for i in range(0, len(row)):
data_structure, idx = self.columns[i]
data_structure[idx] = row[i]

def _process_data(self) -> dict:
"""Process raw data into a form that can be used to create Contacts.

Turn dicts with numeric keys into actual lists, if the keys index
strings. For example, the line `'Organisation': {1: 'Justice League',
2: 'Wayne Enterprises'}` becomes `'Organisation': ['Justice League',
'Wayne Enterprises']`.

Turn dicts with numeric keys into dicts with string keys, if the keys
index dicts. If any of the indexed dicts contains two keys, "type" and
"value", the value indexed by "type" is a key in the new dict, and the
value indexed by "value" is mapped to that key. For example, the line
`'Email': {1: {'type': 'work', 'value': 'thebat@justice.org'}, 2:
{'type': 'home', 'value': 'bruce@gmail.com}` becomes `'Email':
{'work': 'thebat@justice.org', 'home': 'bruce@gmail.com'}`.

If any of the indexed dicts contain the key "type" but not the key
"value", "type" is a key in the new dict, and all other key-value
pairs in the indexed dict are key-value pairs in a dict mapped to that
key. For example, the line `'Address': {1: {'type': 'home', 'Street':
'1007 Mountain Drive', 'City': 'Gotham City', 'Country': 'USA'}}`
becomes `'Address': {'home': {'Street': '1007 Mountain Drive', 'City':
'Gotham City', 'Country': 'USA'}}`.

If any of the indexed dicts have the same value mapped to key "type",
the value indexed by "type" indexes a list in the new dict. The list
contains all of the values that could have been mapped to the key, if
any of the dicts had been the only dict with a "type" of that value.
For example, `'Email': {1: {'type': 'work', 'value':
'thebat@justice.org'}, 2: {'type': 'work', 'value':
'bruce@wayne.com'}` becomes `'Email': {'work': ['thebat@justice.org',
'bruce@wayne.com']}`.

:returns: A dict with the same structure as the dict returned by
khard.YAMLEditable._parse_yaml(). Can be passed to
khard.YAMLEditable.update().
"""
contact_data = {}
for key, val in self.template.items():
if not isinstance(val, dict):
contact_data[key] = val
elif not isinstance(val[0], dict):
contact_data[key] = [val[k] for k in sorted(val.keys())
if val[k]]
elif list(sorted(val[0].keys())) == ["type", "value"]:
contact_data[key] = {}
for d in val.values():
if not d["type"]:
continue
try:
contact_data[key][d["type"]].append(d["value"])
except KeyError:
contact_data[key][d["type"]] = [d["value"]]
else:
contact_data[key] = {}
for d in val.values():
if not d["type"]:
continue
try:
contact_data[key][d["type"]].append(d)
del contact_data[key][d["type"]][-1]["type"]
except KeyError:
contact_data[key][d["type"]] = [d]
del contact_data[key][d["type"]][-1]["type"]
return contact_data
1 change: 1 addition & 0 deletions khard/data/template.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Formatted name,Kind,Prefix,First name,Additional 1,Additional 2,Last name,Suffix,Nickname,Anniversary,Birthday,Organisation 1,Organisation 2,Title,Role,Phone 1 - type,Phone 1 - value,Phone 2 - type,Phone 2 - value,Email 1 - type,Email 1 - value,Email 2 - type,Email 2 - value,Address 1 - type,Address 1 - Box,Address 1 - Extended,Address 1 - Street,Address 1 - Code,Address 1 - City,Address 1 - Region,Address 1 - Country,Categories 1,Categories 2,Webpage,Note{}
15 changes: 15 additions & 0 deletions khard/helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,3 +275,18 @@ def get_new_contact_template(
template = pathlib.Path(__file__).parent.parent / 'data' / 'template.yaml'
with template.open() as temp:
return temp.read().format('\n'.join(formatted_private_objects))


def get_csv_template(
delimiter: str,
supported_private_objects: list[str] | None = []) -> str:
formatted_private_objects = []
if supported_private_objects:
formatted_private_objects.append("")
for i in range(0, len(supported_private_objects)):
formatted_private_objects.append(f"Private {i + 1} - type")
formatted_private_objects.append(f"Private {i + 1} - value")
path = pathlib.Path(__file__).parent.parent / 'data' / 'template.csv'
with path.open() as temp:
template = temp.read().replace(",", delimiter)
return template.format(delimiter.join(formatted_private_objects))
Loading