-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample.env
More file actions
15 lines (11 loc) · 1.86 KB
/
example.env
File metadata and controls
15 lines (11 loc) · 1.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
GITHUB_TOKEN=# Replace with your GitHub Personal Access Token (PAT)
GCS_BUCKET_NAME=predict-pr-merge-datasets # Name of the Google Cloud Storage bucket where the datasets will be stored
GCS_PATH_PREFIX=etl_pipeline # Prefix for the path in the GCS bucket where the datasets will be stored. The full path will be constructed as {GCS_PATH_PREFIX}/{owner}/{repo}/{ingestion_id}
GOOGLE_APPLICATION_CREDENTIALS= # Path to the Google Cloud service account credentials JSON file. This is required for authentication when uploading datasets to GCS.
PRS_PER_PAGE=20 # Number of pull requests to fetch per page when making API calls to GitHub. Adjust this value based on your needs and GitHub API rate limits.
MAX_PR_FETCH_WORKERS = 5 # Maximum number of worker threads to use for fetching pull request data from GitHub. Adjust this value based on your system's capabilities and the expected load.
ENRICHER_MAX_RETRIES = 5 # Maximum number of retries for the PR enricher in case of failures. This helps to ensure that transient issues do not cause the enrichment process to fail permanently.
MAX_PR_ENRICHER_WORKERS = 5 # Maximum number of worker threads to use for enriching pull request data. Adjust this value based on your system's capabilities and the expected load.
PR_REQ_FETCH_TIMEOUT=60 # Timeout in seconds for fetching pull request data from GitHub. Adjust this value based on your network conditions and the expected response times from the GitHub API.
MAX_EXTRACTION_PAGES=5 # Maximum number of pages to fetch when extracting pull request data from GitHub. This limits the total number of pull requests that will be processed in a single run. Adjust this value based on your needs and GitHub API rate limits.
ETL_BATCH_SIZE=20 # Batch size for processing pull request data during the ETL (Extract, Transform, Load) process. Adjust this value based on your system's capabilities and the expected load.