Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 137 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# Vim
*.swp

# Repo specific
cbert_model/
aug_data_*/
result/
33 changes: 32 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,35 @@
# cbert_aug
# Changes to make reproducible

Build docker environment:
```
docker build -f docker/Dockerfile -t cbert_cuda110 .
```

Mount `code` directory and connect interactively to instance:
```
docker run \
--gpus all \
--mount type=bind,source="$(pwd)"/code,target=/cbert/code \
-it cbert_cuda110 /bin/bash
```

Fine tune BERT on Subjective/Objective dataset (takes ~20 minutes):
(see file for other options)
```
python3 cbert_finetune.py
```
Model is saved to `cbert_model/`.

Generate augmented data for Subjective/Objective dataset:
```
python3 cbert_augdata.py --num_train_epochs 1
```
The data are output to `aug_data_<SUFFIX>/subj`, where `<SUFFIX>` depends on the
parameters used for the data augmentation.
- `train_origin.tsv`: unaugmented data
- `train.tsv`: unaugmented data + augmented data (appended)

# Original README

Thanks @liuyaxin 's effort to rewrite the code with huggingface's latest transformer library.
If you want to reproduce the results in paper, you can switch to the develop branch.
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
24 changes: 24 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04

WORKDIR /cbert

COPY requirements.txt requirements.txt

# Disable nvidia repos as their GPG key isn't recognized
RUN mv -f /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/cuda.list.bak 2>/dev/null; true
RUN mv -f /etc/apt/sources.list.d/nvidia-ml.list /etc/apt/sources.list.d/nvidia-ml.list.bak 2>/dev/null; true

RUN apt-get update &&\
apt-get install --no-install-recommends --no-install-suggests -y gnupg2 \
ca-certificates \
git \
build-essential \
python3-dev \
python3-pip \
wget \
curl \
rsync


RUN pip install -r requirements.txt
WORKDIR /cbert/code
52 changes: 52 additions & 0 deletions docker/nvidia_gpgkey
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
-----BEGIN PGP PUBLIC KEY BLOCK-----

mQINBFnNWDEBEACiX68rxIWvqH3h2GykO25oK9BAqV8fDtb6lXEbw3eKx4g87BRz
M3DQBA0S0IfkQ72ovJ33H50+gVTXuu+Zme5muWk72m3pApccZVDLqdzYlpWPruNb
MC+IlWr70yo8Jw8Zr1ihbWjFvMbDJTkgqPt2djNq3xxvdiKoZlgnpLRKIpSu9iBQ
lNoZLHxTQKFH4219L77prRogv2QV1ckBL5lDVOERJuHo4jHE8mm9/NZ6v3m2HGuu
AEZ7T9nWlPGiAIP8Pww4ZRTJcBANcI2EFKPLdfP61HTH6w0kVMkoAaGlemadTDl3
ZcLpUpTFLc+ko/2uQ1qVPx9QYyoMrorS3kUmlXrhsA7FvcB09aIcb+JX6SVkcbO5
A5+baCa3owwUtFBXMHM5hqpLv4P3/GsuW6283YwLZCf53dJY4lJZePqzPGsvs/wS
vhnZrFvb61i/Aqm0hjhVh7h6VNxUiE8geMcjxy29LtzajoyS0EPVxes4xZu0VbS7
8LQyCNHSpS7TFmtVUQmbXqDN7cpiyr9+yutr0lZOMc7NYQt0nP/3RtYkWEob6wXa
rVImHas1OYzlZymdO1uAnqkediS61E2vSD1OEq37/375FB/Q3AYXuNkQzDjYoJJz
9wsv7Xp0bdPzQ/daLdIFNQXo5MmVIirsWM07JvbZaJhDOiJxGn0MPf11/QARAQAB
tEBOVklESUEgQ09SUE9SQVRJT04gKE9wZW4gU291cmNlIFByb2plY3RzKSA8Y3Vk
YXRvb2xzQG52aWRpYS5jb20+iQI4BBMBCgAiBQJZzVgxAhsPBgsJCggHAwUVCgkI
CwUWAgMBAAIeAQIXgAAKCRDdyuBE95bssAh6EACgUCww2sr8sOztEHKhvdCsonXu
THYbel3YlWmVDPbh4dA31xoRXlvSJptJzPi/zlTc9fkVSFGbEZbFRR4JjnwYTMLD
ElMh5YRMYAoPVYhWGKIO4earu32GhFuPjfr6h+0xNaQeDPIbr7bPe/AEhLSdJMzI
OuAifr7UaC65A6YlxfeaSqyt0HthYujoQ12cWxP998C5jkc0IN2tyLs/OD7HLHht
+lafqDSylykx63cw7jvsV/15rqZwVwjhkcxZyrKET32MTjXF3cxn7+TGpKS8B1k4
a/EI7uXnncfSoma0dAT9bZM9JZbXQmSzCPDHHuVtnQ/3uh8VyenpigTFnrb20LCy
6WzJd3O9lAZXLhvwF/By3a07WLzRtTZNaUpt37Anb0js2syr3lohbmK9i3xvuqZN
zhGPbqu9IV+vFgSGyTHRJUSBlHKDGiCdOOHc20MLPW1yRCXbx0F4eS9TWchYyJkJ
NNczD5DnEl/gsvL4NCRxa+oUyUhhJ1HpJ6YNmTsy6nAAKIC+6248o164GiavaR3z
03RfaQayGHAUrBKi+PJBY7efgsZeYT8f+hyYrIC04MO8poBKS/GvSUL2QtVtj59N
q+95gIptW2mZM8KRpt2huLH+QQ8SKr1vAECbpKJOwseqKmVyxX02iaSE8ifLE+tX
FE8YgS3CZjWwy5PD0LkBDQRdgpCQAQgAx1oxX9tFlv3CIva0CJ0dsZyNF7mgHPgN
szccUYLu0chyWYvwiVU/OlCzivytNX56wgeBgIVV1QzeBuTkrJSgzJ+dSgfrmyg5
RwIDhvH+Dcut0++6+di1LyH9gXQcYPrN3pf4yR8nlRbm6K0Vsp0Z4+br18QelURe
rfAkRordag26aB+MzVLvloHHu3Z6/v321uTGMdFd8CVCjovec5+EdcIAam3U/MmZ
e2mr2M/x6F3st30cE7umq9Bb6UCqc6L8bQcoloxR3rwFzL1u9wUBUzQlaMNmxbe0
BfezkmSQeC8JN4Fku+DtHEpS9uP5JEYNEEQ66K4mJDTMr0whBv1fKQARAQABiQNb
BBgBCgAmAhsCFiEEyVsyG2HojBgJxPdZ3crgRPeW7LAFAl7oD1gFCQNGskgBKcBd
IAQZAQoABgUCXYKQkAAKCRBu2RyjrBFgzZ/WB/9TuD2qzaBO7HlPDWRUTpFlvFgy
Dc3XyfTAC/ISeYbIcPcq5kmVHgpsMdbN9Vvmot5GuT7VWzhHc9sJCmHgL330glBt
NtSRflKzlBYnbiSWxLFYZtu2BtNOk8Ylbw8qw1E6W/iFBrqAwgeZvs2VOcPU3203
Mqfi1JbS+YHC/bgs6cNq0zs/WJraYxiuleclKYExxLt9tRd0058n58GAph+Ki7mR
InO6kxuKpsQannSn1Ku/DiaQcSF2L2TMSo0N9zwvYEZR+hgsKVqyRKT+DkZhusHJ
HYGv96YHSTwo016ZhwYS9t0MLXY9/PgJysuO41Ya4Ii43D3UK1wOHTmyHZHTCRDd
yuBE95bssDpwD/4jV9Pin3vAKa4hhn5GD4e478FNKRD58Q7qF3AhVTBNPIl1m4EF
X7sqI6cXUDG4BjpS70ZRWF2x51ZTiq7DLTV/gGw2okfVjoWjzQY0ebrLd4IoNs80
lIHmXxa+JdwB6WupCUzKCKLcPsX/yPAmswPNGAuIMAv+PWhUUSMVtzOZldnlogGM
hbJ9UD2txFGGh9WoYc2vgX9KAaKryXcC6QMabv7JJU24HEJJDgbJEvtFM5PS8QMF
bXIZsYgICWpQXVChBbduXo9sD2TUDWYAniNaaw4LKxPRG+Ix4HAqkh1oNOLojO30
DO3r1/62FKE5/ykg3iSMTDR0iOES/leXCCIO9fRJT8+eucxyOQoY5ti7tjt1wm3H
nTB+Rz3E/E2qeLs2PN82aseccm1G06pmsMCUiWtmSV6HjdO2XufYprrGLSu0RrT3
sz5WHGUOY2iO40xHhSiXg3TcLZRpv30DQzxoUrx9Ff//rXLFznh+MksuvVD2roUR
BGz/en31FxAcBoex9nNraeOekbFen5b7Xrq9wnzM5xZvJN2QYB3vS0khz/ZgFyy5
444ALa9gwb29FZCfA4m59S2QoB8uPQGM+8gnusE6J8y4fvI59ugafidIkt86dZ3m
FsEME5XNmBGdNEo2flRVFfpG1IWds2Ba3IsdbYd9nzmbBW7/n0InVRDrIg==
=9QWY
-----END PGP PUBLIC KEY BLOCK-----
7 changes: 7 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
torch==1.11.0
transformers==2.1.0
cython==0.29.28
chainer==7.8.1
cupy-cuda110==7.8.0
scikit_learn==1.0.2
progressbar==2.5
Binary file removed text_classification/__pycache__/__init__.cpython-35.pyc
Binary file not shown.
Binary file removed text_classification/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file removed text_classification/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file removed text_classification/__pycache__/nets.cpython-35.pyc
Binary file not shown.
Binary file removed text_classification/__pycache__/nets.cpython-36.pyc
Binary file not shown.
Binary file removed text_classification/__pycache__/nets.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.