Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions copilot-worktree-2026-01-14T11-31-32.code-workspace
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"folders": [
{
"path": "."
}
],
"settings": {
"jdk.telemetry.enabled": true,
"redhat.telemetry.enabled": true
}
}
142 changes: 74 additions & 68 deletions examples/advanced_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,19 @@

def example_1_basic_workflow():
"""Example 1: Basic analysis workflow."""
print("="*70)
print("=" * 70)
print("Example 1: Basic Analysis Workflow")
print("="*70)
print("=" * 70)

# Load data
loader = CodeFrequencyLoader()
data = loader.load()

print(f"\n📊 Loaded {len(data)} records")
print(f"Date range: {data['DateTime'].min().date()} to {data['DateTime'].max().date()}")

print(
f"Date range: {data['DateTime'].min().date()} to {data['DateTime'].max().date()}"
)

# Get summary
summary = loader.get_summary()
print(f"\n📈 Summary:")
Expand All @@ -39,47 +41,49 @@ def example_1_basic_workflow():

def example_2_sprint_analysis():
"""Example 2: Detailed sprint analysis."""
print("\n" + "="*70)
print("\n" + "=" * 70)
print("Example 2: Sprint Detection and Analysis")
print("="*70)
print("=" * 70)

loader = CodeFrequencyLoader()
analyzer = CodeFrequencyAnalyzer(loader)

# Detect sprints with custom parameters
sprints = analyzer.detect_sprints(window_weeks=3, threshold_multiplier=1.5)

print(f"\n🚀 Detected {len(sprints)} coding sprints:\n")

for i, sprint in enumerate(sprints[:5], 1): # Show top 5
print(f"Sprint {i}:")
print(f" Duration: {sprint['duration_weeks']} weeks")
print(f" Period: {sprint['start_date'].date()} to {sprint['end_date'].date()}")
print(f" Total changes: {sprint['total_additions'] + sprint['total_deletions']:,}")
print(
f" Total changes: {sprint['total_additions'] + sprint['total_deletions']:,}"
)
print(f" Avg weekly churn: {sprint['avg_weekly_churn']:,.0f}")
print()


def example_3_time_analysis():
"""Example 3: Time-based analysis."""
print("="*70)
print("=" * 70)
print("Example 3: Time-Based Analysis")
print("="*70)
print("=" * 70)

loader = CodeFrequencyLoader()
analyzer = CodeFrequencyAnalyzer(loader)

# Yearly statistics
yearly = analyzer.get_yearly_stats()
print("\n📅 Top 3 Most Productive Years:")
top_years = yearly.nlargest(3, 'Additions_sum')
top_years = yearly.nlargest(3, "Additions_sum")

for year, row in top_years.iterrows():
print(f"\n{year}:")
print(f" Additions: {int(row['Additions_sum']):,}")
print(f" Deletions: {int(abs(row['Deletions_sum'])):,}")
print(f" Net: {int(row['net_changes']):,}")

# Activity patterns
print(f"\n📊 Activity Statistics:")
print(f" Activity ratio: {analyzer.calculate_activity_ratio():.2%}")
Expand All @@ -90,74 +94,76 @@ def example_3_time_analysis():

def example_4_custom_visualization():
"""Example 4: Custom visualization."""
print("\n" + "="*70)
print("\n" + "=" * 70)
print("Example 4: Custom Visualization")
print("="*70)
print("=" * 70)

loader = CodeFrequencyLoader()
loader.load()
data = loader.data.copy()

# Create custom analysis
data['NetChanges'] = data['Additions'] + data['Deletions']
data['Year'] = data['DateTime'].dt.year
data["NetChanges"] = data["Additions"] + data["Deletions"]
data["Year"] = data["DateTime"].dt.year

# Calculate moving average
data['MA_30'] = data['Additions'].rolling(window=30, center=True).mean()
data["MA_30"] = data["Additions"].rolling(window=30, center=True).mean()

# Create custom plot
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 8))

# Plot 1: Additions with moving average
ax1.plot(data['DateTime'], data['Additions'], alpha=0.3, label='Additions')
ax1.plot(data['DateTime'], data['MA_30'], 'r-', linewidth=2, label='30-week MA')
ax1.set_ylabel('Lines Added')
ax1.set_title('Code Additions with 30-Week Moving Average')
ax1.plot(data["DateTime"], data["Additions"], alpha=0.3, label="Additions")
ax1.plot(data["DateTime"], data["MA_30"], "r-", linewidth=2, label="30-week MA")
ax1.set_ylabel("Lines Added")
ax1.set_title("Code Additions with 30-Week Moving Average")
ax1.legend()
ax1.grid(True, alpha=0.3)

# Plot 2: Net changes by year
yearly_net = data.groupby('Year')['NetChanges'].sum()
ax2.bar(yearly_net.index, yearly_net.values, color='steelblue', alpha=0.7)
ax2.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
ax2.set_xlabel('Year')
ax2.set_ylabel('Net Changes')
ax2.set_title('Net Code Changes by Year')
ax2.grid(True, alpha=0.3, axis='y')
yearly_net = data.groupby("Year")["NetChanges"].sum()
ax2.bar(yearly_net.index, yearly_net.values, color="steelblue", alpha=0.7)
ax2.axhline(y=0, color="black", linestyle="-", linewidth=0.5)
ax2.set_xlabel("Year")
ax2.set_ylabel("Net Changes")
ax2.set_title("Net Code Changes by Year")
ax2.grid(True, alpha=0.3, axis="y")

plt.tight_layout()

# Save plot
output_path = Path('output/visualizations/custom_analysis.png')
output_path = Path("output/visualizations/custom_analysis.png")
output_path.parent.mkdir(parents=True, exist_ok=True)
plt.savefig(output_path, dpi=300, bbox_inches='tight')
plt.savefig(output_path, dpi=300, bbox_inches="tight")
print(f"\n✅ Custom visualization saved to {output_path}")
plt.close()


def example_5_filtering_analysis():
"""Example 5: Filtering and conditional analysis."""
print("\n" + "="*70)
print("\n" + "=" * 70)
print("Example 5: Filtering and Conditional Analysis")
print("="*70)
print("=" * 70)

loader = CodeFrequencyLoader()
loader.load()
data = loader.data.copy()

# Analyze only high-activity periods
data['AbsChanges'] = data['Additions'] + abs(data['Deletions'])
high_activity = data[data['AbsChanges'] > data['AbsChanges'].quantile(0.75)]
data["AbsChanges"] = data["Additions"] + abs(data["Deletions"])
high_activity = data[data["AbsChanges"] > data["AbsChanges"].quantile(0.75)]

print(f"\n📊 High Activity Periods (top 25%):")
print(f" Total records: {len(high_activity)}")
print(f" Date range: {high_activity['DateTime'].min().date()} to {high_activity['DateTime'].max().date()}")
print(
f" Date range: {high_activity['DateTime'].min().date()} to {high_activity['DateTime'].max().date()}"
)
print(f" Total changes: {high_activity['AbsChanges'].sum():,.0f}")

# Analyze recent activity (last 2 years)
recent_date = data['DateTime'].max() - pd.Timedelta(days=730)
recent_data = data[data['DateTime'] >= recent_date]
recent_date = data["DateTime"].max() - pd.Timedelta(days=730)
recent_data = data[data["DateTime"] >= recent_date]

print(f"\n📅 Recent Activity (last 2 years):")
print(f" Records: {len(recent_data)}")
print(f" Total additions: {recent_data['Additions'].sum():,}")
Expand All @@ -167,16 +173,16 @@ def example_5_filtering_analysis():

def example_6_comparison_analysis():
"""Example 6: Comparative analysis."""
print("\n" + "="*70)
print("\n" + "=" * 70)
print("Example 6: Comparative Analysis")
print("="*70)
print("=" * 70)

loader = CodeFrequencyLoader()
analyzer = CodeFrequencyAnalyzer(loader)

# Compare productivity across time periods
trends = analyzer.get_productivity_trends(periods=4)

print("\n📊 Productivity Trends (4 periods):")
for period, row in trends.iterrows():
print(f"\nPeriod {period + 1}:")
Expand All @@ -193,19 +199,19 @@ def main():
print("║ 📚 RunTime Advanced Examples 📚 ║")
print("║ ║")
print("╚═══════════════════════════════════════════════════════════════╝\n")

try:
example_1_basic_workflow()
example_2_sprint_analysis()
example_3_time_analysis()
example_4_custom_visualization()
example_5_filtering_analysis()
example_6_comparison_analysis()
print("\n" + "="*70)

print("\n" + "=" * 70)
print("✅ All examples completed successfully!")
print("="*70)
print("=" * 70)

except Exception as e:
print(f"\n❌ Error: {e}")
raise
Expand Down
81 changes: 10 additions & 71 deletions notebooks/exploration.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -610,80 +610,19 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 1,
"id": "7be2de20",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found 8 coding sprints:\n",
"\n",
"Sprint 1:\n",
" start_date: 2018-03-11 00:00:00\n",
" end_date: 2018-04-22 00:00:00\n",
" duration_weeks: 7\n",
" total_additions: 817757\n",
" total_deletions: 219662\n",
" avg_weekly_churn: 148202.7142857143\n",
"\n",
"Sprint 2:\n",
" start_date: 2018-05-20 00:00:00\n",
" end_date: 2018-06-10 00:00:00\n",
" duration_weeks: 4\n",
" total_additions: 135273\n",
" total_deletions: 527\n",
" avg_weekly_churn: 33950.0\n",
"\n",
"Sprint 3:\n",
" start_date: 2018-07-22 00:00:00\n",
" end_date: 2018-08-12 00:00:00\n",
" duration_weeks: 4\n",
" total_additions: 138173\n",
" total_deletions: 110270\n",
" avg_weekly_churn: 62110.75\n",
"\n",
"Sprint 4:\n",
" start_date: 2019-06-02 00:00:00\n",
" end_date: 2019-06-23 00:00:00\n",
" duration_weeks: 4\n",
" total_additions: 62\n",
" total_deletions: 219657\n",
" avg_weekly_churn: 54929.75\n",
"\n",
"Sprint 5:\n",
" start_date: 2019-11-03 00:00:00\n",
" end_date: 2019-11-24 00:00:00\n",
" duration_weeks: 4\n",
" total_additions: 44861\n",
" total_deletions: 767311\n",
" avg_weekly_churn: 203043.0\n",
"\n",
"Sprint 6:\n",
" start_date: 2021-05-23 00:00:00\n",
" end_date: 2021-06-13 00:00:00\n",
" duration_weeks: 4\n",
" total_additions: 88404\n",
" total_deletions: 77108\n",
" avg_weekly_churn: 41378.0\n",
"\n",
"Sprint 7:\n",
" start_date: 2021-11-28 00:00:00\n",
" end_date: 2021-11-28 00:00:00\n",
" duration_weeks: 1\n",
" total_additions: 0\n",
" total_deletions: 0\n",
" avg_weekly_churn: 0.0\n",
"\n",
"Sprint 8:\n",
" start_date: 2022-02-13 00:00:00\n",
" end_date: 2022-03-06 00:00:00\n",
" duration_weeks: 4\n",
" total_additions: 9613\n",
" total_deletions: 54794\n",
" avg_weekly_churn: 16101.75\n",
"\n"
"ename": "NameError",
"evalue": "name 'analyzer' is not defined",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m sprints = \u001b[43manalyzer\u001b[49m.detect_sprints()\n\u001b[32m 3\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mFound \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(sprints)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m coding sprints:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 4\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m i, sprint \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(sprints, \u001b[32m1\u001b[39m):\n",
"\u001b[31mNameError\u001b[39m: name 'analyzer' is not defined"
]
}
],
Expand Down Expand Up @@ -816,7 +755,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand Down
Binary file modified src/__pycache__/load_data.cpython-312.pyc
Binary file not shown.
Loading