diff --git a/.env.example b/.env.example index 7b5722f..eebccc7 100644 --- a/.env.example +++ b/.env.example @@ -2,7 +2,8 @@ SECRET_KEY=change-me DEBUG=True -ALLOWED_HOSTS= +ALLOWED_HOSTS +HS_CODE_SEARCH_THRESHOLD=0.1 # db DATABASE_URL=postgres://hsuser:hspass@localhost:5432/hsdb diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..b79bcb9 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,55 @@ +name: Deploy Production + +on: +workflow_run: +workflows: ["CI"] +branches: [main] +types: +- completed + +jobs: +deploy: +if: ${{ github.event.workflow_run.conclusion == 'success' }} + +runs-on: ubuntu-latest + +steps: + - name: Deploy to VPS + uses: appleboy/ssh-action@v1.0.3 + with: + host: ${{ secrets.VPS_HOST }} + username: ${{ secrets.VPS_USER }} + key: ${{ secrets.VPS_SSH_KEY }} + port: ${{ secrets.VPS_PORT }} + + script: | + set -e + + cd /Hs_codes_api + + echo "Pulling latest images..." + docker compose pull + + echo "Starting updated containers..." + docker compose up -d + + echo "Waiting for application..." + sleep 15 + + echo "Running migrations..." + docker compose exec -T web python manage.py migrate --noinput + + echo "Checking health endpoint..." + + for i in $(seq 1 20); do + if curl -fsS http://localhost:8001/api/v1/health/ > /dev/null; then + echo "Health check passed" + exit 0 + fi + + echo "Waiting for healthy container..." + sleep 5 + done + + echo "Health check failed" + exit 1 diff --git a/.gitignore b/.gitignore index 5897e23..5f8f5e1 100644 --- a/.gitignore +++ b/.gitignore @@ -34,4 +34,4 @@ dist/ build/ *.egg-info/ -celerybeat-schedule \ No newline at end of file +celerybeat-schedule diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d6810c1 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,39 @@ +# Builder Stage +FROM python:3.12-slim AS builder + +WORKDIR /app + +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +RUN apt-get update && apt-get install -y \ + build-essential \ + libpq-dev + +COPY requirements.txt . + +RUN pip install --upgrade pip +RUN pip install --prefix=/install -r requirements.txt + + +# Runtime Stage +FROM python:3.12-slim + +WORKDIR /app + +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +RUN groupadd -r appuser && useradd -r -g appuser appuser + +COPY --from=builder /install /usr/local + +COPY . . + +RUN chown -R appuser:appuser /app + +USER appuser + +EXPOSE 8001 + +ENTRYPOINT ["/app/entrypoint.sh"] diff --git a/app/migrations/0001_initial.py b/app/migrations/0001_initial.py index c0496f1..86c1703 100644 --- a/app/migrations/0001_initial.py +++ b/app/migrations/0001_initial.py @@ -1,9 +1,12 @@ -# Generated by Django 6.0.5 on 2026-05-31 16:13 +# Generated by Django 6.0.5 on 2026-06-01 14:40 import django.contrib.auth.models import django.contrib.auth.validators +import django.contrib.postgres.indexes +import django.db.models.deletion import django.utils.timezone from django.db import migrations, models +from django.contrib.postgres.operations import TrigramExtension class Migration(migrations.Migration): @@ -15,6 +18,38 @@ class Migration(migrations.Migration): ] operations = [ + TrigramExtension(), + + migrations.CreateModel( + name="Category", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("name", models.CharField(max_length=25)), + ], + ), + migrations.CreateModel( + name="HsCodeFile", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("hs_code_file", models.FileField(upload_to="hs_code")), + ], + ), migrations.CreateModel( name="User", fields=[ @@ -30,9 +65,7 @@ class Migration(migrations.Migration): ("password", models.CharField(max_length=128, verbose_name="password")), ( "last_login", - models.DateTimeField( - blank=True, null=True, verbose_name="last login" - ), + models.DateTimeField(blank=True, null=True, verbose_name="last login"), ), ( "is_superuser", @@ -59,21 +92,15 @@ class Migration(migrations.Migration): ), ( "first_name", - models.CharField( - blank=True, max_length=150, verbose_name="first name" - ), + models.CharField(blank=True, max_length=150, verbose_name="first name"), ), ( "last_name", - models.CharField( - blank=True, max_length=150, verbose_name="last name" - ), + models.CharField(blank=True, max_length=150, verbose_name="last name"), ), ( "email", - models.EmailField( - blank=True, max_length=254, verbose_name="email address" - ), + models.EmailField(blank=True, max_length=254, verbose_name="email address"), ), ( "is_staff", @@ -93,8 +120,14 @@ class Migration(migrations.Migration): ), ( "date_joined", - models.DateTimeField( - default=django.utils.timezone.now, verbose_name="date joined" + models.DateTimeField(default=django.utils.timezone.now, verbose_name="date joined"), + ), + ( + "role", + models.CharField( + choices=[("Admin", "admin"), ("Staff", "staff")], + default="admin", + max_length=5, ), ), ( @@ -129,4 +162,52 @@ class Migration(migrations.Migration): ("objects", django.contrib.auth.models.UserManager()), ], ), - ] + migrations.CreateModel( + name="HsCode", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("hs_code", models.CharField(max_length=20)), + ("description", models.TextField()), + ("created_at", models.DateTimeField(auto_now_add=True)), + ( + "category", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="app.category", + ), + ), + ( + "hs_code_file", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="app.hscodefile", + ), + ), + ], + options={ + "indexes": [ + django.contrib.postgres.indexes.GinIndex( + fields=["hs_code"], + name="hscode_hs_code_trgm_idx", + opclasses=["gin_trgm_ops"], + ), + django.contrib.postgres.indexes.GinIndex( + fields=["description"], + name="hscode_description_trgm_idx", + opclasses=["gin_trgm_ops"], + ), + ], + "constraints": [ + models.UniqueConstraint(fields=("hs_code",), name="unique_hs_code") + ], + }, + ), + ] \ No newline at end of file diff --git a/app/migrations/0002_remove_hscode_category.py b/app/migrations/0002_remove_hscode_category.py new file mode 100644 index 0000000..eb33694 --- /dev/null +++ b/app/migrations/0002_remove_hscode_category.py @@ -0,0 +1,17 @@ +# Generated by Django 6.0.5 on 2026-06-01 16:32 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("app", "0001_initial"), + ] + + operations = [ + migrations.RemoveField( + model_name="hscode", + name="category", + ), + ] diff --git a/app/migrations/0002_user_role.py b/app/migrations/0002_user_role.py deleted file mode 100644 index 050a5de..0000000 --- a/app/migrations/0002_user_role.py +++ /dev/null @@ -1,23 +0,0 @@ -# Generated by Django 6.0.5 on 2026-05-31 16:25 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("app", "0001_initial"), - ] - - operations = [ - migrations.AddField( - model_name="user", - name="role", - field=models.CharField( - choices=[("Admin", "admin"), ("Staff", "staff")], - default="admin", - max_length=5, - ), - preserve_default=False, - ), - ] diff --git a/app/migrations/0003_category_alter_user_role_hscode.py b/app/migrations/0003_category_alter_user_role_hscode.py deleted file mode 100644 index 4b232aa..0000000 --- a/app/migrations/0003_category_alter_user_role_hscode.py +++ /dev/null @@ -1,76 +0,0 @@ -# Generated by Django 6.0.5 on 2026-05-31 16:56 - -import django.contrib.postgres.indexes -import django.db.models.deletion -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("app", "0002_user_role"), - ] - - operations = [ - migrations.CreateModel( - name="Category", - fields=[ - ( - "id", - models.BigAutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ("name", models.CharField(max_length=25)), - ], - ), - migrations.AlterField( - model_name="user", - name="role", - field=models.CharField( - choices=[("Admin", "admin"), ("Staff", "staff")], - default="admin", - max_length=5, - ), - ), - migrations.CreateModel( - name="HsCode", - fields=[ - ( - "id", - models.BigAutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ("hs_code", models.CharField(max_length=20)), - ("description", models.TextField()), - ("created_at", models.DateTimeField(auto_now_add=True)), - ( - "category", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, to="app.category" - ), - ), - ], - options={ - "indexes": [ - django.contrib.postgres.indexes.GinIndex( - fields=["hs_code"], - name="hscode_hs_code_trgm_idx", - opclasses=["gin_trgm_ops"], - ), - django.contrib.postgres.indexes.GinIndex( - fields=["description"], - name="hscode_description_trgm_idx", - opclasses=["gin_trgm_ops"], - ), - ], - }, - ), - ] diff --git a/app/migrations/0004_remove_hscode_hscode_hs_code_trgm_idx_and_more.py b/app/migrations/0004_remove_hscode_hscode_hs_code_trgm_idx_and_more.py deleted file mode 100644 index 0a89f1c..0000000 --- a/app/migrations/0004_remove_hscode_hscode_hs_code_trgm_idx_and_more.py +++ /dev/null @@ -1,21 +0,0 @@ -# Generated by Django 6.0.5 on 2026-05-31 17:00 - -from django.db import migrations - - -class Migration(migrations.Migration): - - dependencies = [ - ("app", "0003_category_alter_user_role_hscode"), - ] - - operations = [ - migrations.RemoveIndex( - model_name="hscode", - name="hscode_hs_code_trgm_idx", - ), - migrations.RemoveIndex( - model_name="hscode", - name="hscode_description_trgm_idx", - ), - ] diff --git a/app/migrations/0005_hscodefile_hscode_hs_code_file.py b/app/migrations/0005_hscodefile_hscode_hs_code_file.py deleted file mode 100644 index 703fd1b..0000000 --- a/app/migrations/0005_hscodefile_hscode_hs_code_file.py +++ /dev/null @@ -1,39 +0,0 @@ -# Generated by Django 6.0.5 on 2026-05-31 17:09 - -import django.db.models.deletion -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("app", "0004_remove_hscode_hscode_hs_code_trgm_idx_and_more"), - ] - - operations = [ - migrations.CreateModel( - name="HsCodeFile", - fields=[ - ( - "id", - models.BigAutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ("hs_code_file", models.FileField(upload_to="hs_code")), - ], - ), - migrations.AddField( - model_name="hscode", - name="hs_code_file", - field=models.ForeignKey( - default="", - on_delete=django.db.models.deletion.CASCADE, - to="app.hscodefile", - ), - preserve_default=False, - ), - ] diff --git a/app/migrations/0006_hscode_unique_hs_code.py b/app/migrations/0006_hscode_unique_hs_code.py deleted file mode 100644 index edcaa5e..0000000 --- a/app/migrations/0006_hscode_unique_hs_code.py +++ /dev/null @@ -1,17 +0,0 @@ -# Generated by Django 6.0.5 on 2026-06-01 13:13 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('app', '0005_hscodefile_hscode_hs_code_file'), - ] - - operations = [ - migrations.AddConstraint( - model_name='hscode', - constraint=models.UniqueConstraint(fields=('hs_code',), name='unique_hs_code'), - ), - ] diff --git a/app/models.py b/app/models.py index 83d19df..052e955 100644 --- a/app/models.py +++ b/app/models.py @@ -1,8 +1,8 @@ from django.db import models from django.contrib.auth.models import AbstractUser -# from django.contrib.postgres.indexes import GinIndex -# from django.contrib.postgres.operations import TrigramExtension +from django.contrib.postgres.indexes import GinIndex +from django.contrib.postgres.operations import TrigramExtension class User(AbstractUser): @@ -21,7 +21,6 @@ class HsCodeFile(models.Model): class HsCode(models.Model): hs_code_file = models.ForeignKey(HsCodeFile, on_delete=models.CASCADE) - category = models.ForeignKey(Category, on_delete=models.CASCADE) hs_code = models.CharField(max_length=20) description = models.TextField() created_at = models.DateTimeField(auto_now_add=True) @@ -30,18 +29,15 @@ class Meta: constraints = [ models.UniqueConstraint(fields=["hs_code"], name="unique_hs_code") ] - - -# class Meta: -# indexes = [ -# GinIndex( -# fields=["hs_code"], -# opclasses=["gin_trgm_ops"], -# name="hscode_hs_code_trgm_idx" -# ), -# GinIndex( -# fields=["description"], -# opclasses=["gin_trgm_ops"], -# name="hscode_description_trgm_idx" -# ), -# ] + indexes = [ + GinIndex( + fields=["hs_code"], + opclasses=["gin_trgm_ops"], + name="hscode_hs_code_trgm_idx", + ), + GinIndex( + fields=["description"], + opclasses=["gin_trgm_ops"], + name="hscode_description_trgm_idx", + ), + ] diff --git a/app/services/file_upload_service.py b/app/services/file_upload_service.py index f5fd537..f7fb833 100644 --- a/app/services/file_upload_service.py +++ b/app/services/file_upload_service.py @@ -18,6 +18,8 @@ class UploadResult: def process_hs_code_csv(uploaded_file) -> UploadResult: text = _decode_file(uploaded_file) rows = _parse_csv(text) + + print(text) hs_code_file = HsCodeFile.objects.create(hs_code_file=uploaded_file) objects, skipped_blank = _build_objects(rows, hs_code_file) @@ -45,9 +47,9 @@ def _parse_csv(text: str) -> list[dict]: try: reader = csv.DictReader(io.StringIO(text)) - if not {"hs_code", "description"}.issubset(reader.fieldnames or []): + if not {"HS CODE", "GOODS DESCRIPTION"}.issubset(reader.fieldnames or []): raise ValueError( - f"CSV must contain 'hs_code' and 'description' columns. " + f"CSV must contain 'HS CODE' and 'GOODS DESCRIPTION' columns. " f"Found: {reader.fieldnames}" ) @@ -62,33 +64,23 @@ def _parse_csv(text: str) -> list[dict]: def _build_objects(rows, hs_code_file): - from .models import Category - objects = [] skipped_blank = 0 - category_cache: dict[str, Category] = {} for row in rows: - hs_code = row.get("hs_code", "").strip() - description = row.get("description", "").strip() + hs_code = row.get("HS CODE", "").strip() + description = row.get("GOODS DESCRIPTION", "").strip() if not hs_code or not description: skipped_blank += 1 continue - chapter = hs_code[:2] - if chapter not in category_cache: - name = HS_CHAPTER_CATEGORIES.get(chapter, f"Chapter {chapter}") - category, _ = Category.objects.get_or_create(name=name) - category_cache[chapter] = category - objects.append( HsCode( hs_code=hs_code, description=description, - category=category_cache[chapter], hs_code_file=hs_code_file, ) ) - return objects, skipped_blank + return objects, skipped_blank \ No newline at end of file diff --git a/app/urls.py b/app/urls.py index a1eb7d2..f8c75e1 100644 --- a/app/urls.py +++ b/app/urls.py @@ -1,6 +1,14 @@ from django.urls import path -from .views import HsCodeUploadView +from .views import HsCodeUploadView, HsCodeSearchView, HealthCheckView urlpatterns = [ + path( + "hs-codes/", + HsCodeSearchView.as_view(), + name="hs-code-search", + ), path("hs-codes/upload/", HsCodeUploadView.as_view(), name="hscode-upload"), + path("health/", + HealthCheckView.as_view(), + name="health") ] diff --git a/app/views.py b/app/views.py index dba9a93..1f02cb7 100644 --- a/app/views.py +++ b/app/views.py @@ -1,13 +1,22 @@ from dataclasses import asdict +from django.conf import settings +from django.contrib.postgres.search import TrigramSimilarity + +from rest_framework import generics +from rest_framework.exceptions import ValidationError + +from .models import HsCode + from rest_framework import status from rest_framework.parsers import MultiPartParser from rest_framework.response import Response from rest_framework.views import APIView from .permissions import IsAdminOrStaff -from .serializers import HsCodeUploadSerializer +from .serializers import HsCodeUploadSerializer, HsCodeSerializer from .services.file_upload_service import process_hs_code_csv +from loguru import logger class HsCodeUploadView(APIView): @@ -24,6 +33,71 @@ def post(self, request): uploaded_file=serializer.validated_data["file"] ) except ValueError as exc: + logger.error("Failed to process file:{}", str(exc)) return Response({"error": str(exc)}, status=status.HTTP_400_BAD_REQUEST) return Response(asdict(result), status=status.HTTP_201_CREATED) + + +class HsCodeSearchView(generics.ListAPIView): + serializer_class = HsCodeSerializer + + def get_queryset(self): + try: + q = self.request.query_params.get("q") + + if not q: + logger.warning( + "Missing search query | path={path}", + path=self.request.path, + ) + raise ValidationError({"q": ["This query parameter is required."]}) + + threshold = getattr( + settings, + "HS_CODE_SEARCH_THRESHOLD", + 0.1, + ) + + logger.info( + "HS search executed | query={q} | threshold={threshold}", + q=q, + threshold=threshold, + ) + + queryset = ( + HsCode.objects.annotate( + similarity=( + TrigramSimilarity("description", q) * 2 + + TrigramSimilarity("hs_code", q) + ) + ) + .filter(similarity__gte=threshold) + .order_by("-similarity") + ) + + logger.info( + "HS search completed | query={q} | results={count}", + q=q, + count=queryset.count(), + ) + + logger.info(queryset) + + return queryset + + except Exception as e: + logger.exception( + "HS search failed | query={q} | error={error}", + q=self.request.query_params.get("q"), + error=str(e), + ) + raise + + +class HealthCheckView(APIView): + authentication_classes = [] + permission_classes = [] + + def get(self, request): + return Response({"status": "healthy"}) diff --git a/core/settings/base.py b/core/settings/base.py index 183cd55..7c1f7d1 100644 --- a/core/settings/base.py +++ b/core/settings/base.py @@ -19,6 +19,7 @@ "django.contrib.sessions", "django.contrib.messages", "django.contrib.staticfiles", + "django.contrib.postgres", # third_party "rest_framework", # custom @@ -108,3 +109,9 @@ "user": "300/minute", }, } + +HS_CODE_SEARCH_THRESHOLD = config( + "HS_CODE_SEARCH_THRESHOLD", + default=0.1, + cast=float, +) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..a3e5ad7 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,65 @@ +version: "3.9" + +services: +db: +image: postgres:16-alpine +container_name: hs_postgres + +restart: unless-stopped + +environment: + POSTGRES_DB: ${DB_NAME} + POSTGRES_USER: ${DB_USER} + POSTGRES_PASSWORD: ${DB_PASSWORD} + +volumes: + - postgres_data:/var/lib/postgresql/data + +healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${DB_USER} -d ${DB_NAME}"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + +networks: + - backend + +web: +image: ghcr.io/casymoyo-spec/hs-api:latest +container_name: hs_api + +restart: unless-stopped + +env_file: + - .env + +depends_on: + db: + condition: service_healthy + +ports: + - "8001:8001" + +healthcheck: + test: + [ + "CMD", + "python", + "-c", + "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/v1/health/')" + ] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + +networks: + - backend + +volumes: +postgres_data: + +networks: +backend: +driver: bridge diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000..8cd024b --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +set -e + +echo "Running migrations..." +python manage.py migrate --noinput + +echo "Starting Gunicorn..." +exec gunicorn config.wsgi:application \ + --config gunicorn.conf.py diff --git a/gunicorn.conf.py b/gunicorn.conf.py new file mode 100644 index 0000000..59b7a37 --- /dev/null +++ b/gunicorn.conf.py @@ -0,0 +1,11 @@ +import os +from decouple import config + +bind = "0.0.0.0:8001" + +workers = int(config("GUNICORN_WORKERS", "3")) + +timeout = 120 + +accesslog = "-" +errorlog = "-"