Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 7 additions & 13 deletions ipfn/ipfn.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,21 +182,13 @@ def ipfn_df(self, df, aggregates, dimensions, weight_col='total'):
print(df)
print(df.groupby('age')['total'].sum(), xip)"""

steps = len(aggregates)
tables = [df]
for inc in range(steps - 1):
tables.append(df.copy())
original = df.copy()

# Calculate the new weights for each dimension
inc = 0

table_current = df.copy()

for features in dimensions:
if inc == (steps - 1):
table_update = df
table_current = tables[inc].copy()
else:
table_update = tables[inc + 1]
table_current = tables[inc]
table_update = table_current.copy()

tmp = table_current.groupby(features)[weight_col].sum()
xijk = aggregates[inc]
Expand Down Expand Up @@ -234,8 +226,10 @@ def ipfn_df(self, df, aggregates, dimensions, weight_col='total'):

table_update.reset_index(inplace=True)
table_current.reset_index(inplace=True)

table_current = table_update

inc += 1
feat_l = []

# Calculate the max convergence rate
max_conv = 0
Expand Down
34 changes: 34 additions & 0 deletions tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,3 +286,37 @@ def test_pandas_3D(self):
for feature in features:
assert round(df.groupby(vertical)['total'].sum().loc[feature], 2) == round(marginal.loc[feature], 2)
m_inc += 1

def test_pandas_3D_with_zeros(self):
categories = {
"A": [3, 3, 3, 3, 4, 4, 4, 4, 1, 1, 1, 1, 2, 2, 2, 2],
"B": [1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2],
"C": [1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1],
}
seed_total = [
0, 0, 0, 3.,
5., 0, 6., 2.,
2., 1., 0, 2.,
5., 0, 0, 0,
]

true_total = [
0, 0, 0, 3.,
6., 0, 3., 3.,
2., 10., 0, 7.,
9., 0, 0, 0,
]

df_seed = pd.DataFrame(categories | {'total': seed_total})
df_true = pd.DataFrame(categories | {'total': true_total})

dimensions = [['A'], ['B'], ['C'], ['A', 'B'], ['A', 'C'], ['B', 'C']]
aggregates = [df_true.groupby(dimension).sum().total for dimension in dimensions]

IPF = ipfn.ipfn(df_seed, aggregates, dimensions, convergence_rate=1e-8, rate_tolerance=1e-9)

df_fitted = IPF.iteration()
df_verify = df_fitted.merge(df_true, on=list(categories.keys()))

# All fitted values should match known margins within convergence rate
assert ((df_verify.total_x - df_verify.total_y).abs() < 1e-7).all()