Skip to content

Commit a436c61

Browse files
committed
Prepare code for public release
0 parents  commit a436c61

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+4007
-0
lines changed

.dockerignore

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
*.pth
2+
*.wav
3+
*.dac
4+
tests/
5+
runs/

.gitattributes

Whitespace-only changes.

.gitignore

+176
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
pip-wheel-metadata/
24+
share/python-wheels/
25+
*.egg-info/
26+
.installed.cfg
27+
*.egg
28+
MANIFEST
29+
30+
# PyInstaller
31+
# Usually these files are written by a python script from a template
32+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
33+
*.manifest
34+
*.spec
35+
36+
# Installer logs
37+
pip-log.txt
38+
pip-delete-this-directory.txt
39+
40+
# Unit test / coverage reports
41+
htmlcov/
42+
.tox/
43+
.nox/
44+
.coverage
45+
.coverage.*
46+
.cache
47+
nosetests.xml
48+
coverage.xml
49+
*.cover
50+
*.py,cover
51+
.hypothesis/
52+
.pytest_cache/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
target/
76+
77+
# Jupyter Notebook
78+
.ipynb_checkpoints
79+
80+
# IPython
81+
profile_default/
82+
ipython_config.py
83+
84+
# pyenv
85+
.python-version
86+
87+
# pipenv
88+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
90+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
91+
# install all needed dependencies.
92+
#Pipfile.lock
93+
94+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
95+
__pypackages__/
96+
97+
# Celery stuff
98+
celerybeat-schedule
99+
celerybeat.pid
100+
101+
# SageMath parsed files
102+
*.sage.py
103+
104+
# Environments
105+
.env
106+
.venv
107+
env/env.sh
108+
venv/
109+
env.bak/
110+
venv.bak/
111+
112+
# Spyder project settings
113+
.spyderproject
114+
.spyproject
115+
116+
# Rope project settings
117+
.ropeproject
118+
119+
# mkdocs documentation
120+
/site
121+
122+
# mypy
123+
.mypy_cache/
124+
.dmypy.json
125+
dmypy.json
126+
127+
# Pyre type checker
128+
.pyre/
129+
130+
# Files created by experiments
131+
output/
132+
snapshot/
133+
*.m4a
134+
*.wav
135+
notebooks/scratch.ipynb
136+
notebooks/inspect.ipynb
137+
notebooks/effects.ipynb
138+
notebooks/*.ipynb
139+
notebooks/*.gif
140+
notebooks/*.wav
141+
notebooks/*.mp4
142+
*runs/
143+
boards/
144+
samples/
145+
*.ipynb
146+
147+
results.json
148+
metrics.csv
149+
mprofile_*
150+
mem.png
151+
152+
results/
153+
mprofile*
154+
*.png
155+
# do not ignore the test wav file
156+
!tests/audio/short_test_audio.wav
157+
!tests/audio/output.wav
158+
*/.DS_Store
159+
.DS_Store
160+
env.sh
161+
_codebraid/
162+
**/*.html
163+
**/*.exec.md
164+
flagged/
165+
log.txt
166+
ckpt/
167+
.syncthing*
168+
tests/assets/
169+
archived/
170+
171+
*_remote_module_*
172+
*.zip
173+
*.pth
174+
encoded_out/
175+
recon/
176+
recons/

.pre-commit-config.yaml

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
repos:
2+
- repo: https://github.com/asottile/reorder_python_imports
3+
rev: v2.5.0
4+
hooks:
5+
- id: reorder-python-imports
6+
- repo: https://github.com/psf/black
7+
rev: 23.1.0
8+
hooks:
9+
- id: black
10+
language_version: python3
11+
- repo: https://github.com/pre-commit/pre-commit-hooks
12+
rev: v4.0.1
13+
hooks:
14+
- id: end-of-file-fixer
15+
- id: trailing-whitespace

Dockerfile

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
2+
3+
COPY . /app
4+
WORKDIR /app
5+
6+
RUN apt update && apt install -y git
7+
# install the package
8+
RUN pip install .
9+
10+
# cache the model
11+
RUN python3 -m dac download

LICENSE

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2023-present, Descript
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# Descript Audio Codec (.dac)
2+
3+
<!-- ![](https://static.arxiv.org/static/browse/0.3.4/images/icons/favicon-32x32.png) -->
4+
5+
6+
This repository contains training and inference scripts
7+
for the Descript Audio Codec (.dac), a high fidelity general
8+
neural audio codec.
9+
10+
11+
## Usage
12+
13+
### Installation
14+
```
15+
git clone https://github.com/descriptinc/descript-audio-codec
16+
cd descript-audio-codec
17+
pip install .
18+
```
19+
20+
### Compress audio
21+
```
22+
python3 -m dac encode /path/to/input --output /path/to/output/codes
23+
```
24+
25+
This command will create `.dac` files with the same name as the input files.
26+
It will also preserve the directory structure relative to input root and
27+
re-create it in the output directory. Please use `python -m dac encode --help`
28+
for more options.
29+
30+
### Reconstruct audio from compressed codes
31+
```
32+
python3 -m dac decode /path/to/output/codes --output /path/to/reconstructed_input
33+
```
34+
35+
This command will create `.wav` files with the same name as the input files.
36+
It will also preserve the directory structure relative to input root and
37+
re-create it in the output directory. Please use `python -m dac decode --help`
38+
for more options.
39+
40+
### Docker image
41+
We provide a dockerfile to build a docker image with all the necessary
42+
dependencies.
43+
1. Building the image.
44+
```
45+
docker build -t dac .
46+
```
47+
2. Using the image.
48+
49+
Usage on CPU:
50+
```
51+
docker run dac <command>
52+
```
53+
54+
Usage on GPU:
55+
```
56+
docker run --gpus=all dac <command>
57+
```
58+
59+
`<command>` can be one of the compression and reconstruction commands listed
60+
above. For example, if you want to run compression,
61+
62+
```
63+
docker run --gpus=all dac python3 -m dac encode ...
64+
```
65+
66+
67+
## Training
68+
The baseline model configuration can be trained using the following commands.
69+
70+
### Pre-requisites
71+
Please install the correct dependencies
72+
```
73+
pip install -e ".[dev]"
74+
```
75+
76+
77+
### Single GPU training
78+
```
79+
export CUDA_VISIBLE_DEVICES=0
80+
python scripts/train.py --args.load conf/ablations/baseline.yml --save_path runs/baseline/
81+
```
82+
83+
### Multi GPU training
84+
```
85+
export CUDA_VISIBLE_DEVICES=0,1
86+
torchrun --nproc_per_node gpu scripts/train.py --args.load conf/ablations/baseline.yml --save_path runs/baseline/
87+
```
88+
89+
## Testing
90+
We provide two test scripts to test CLI + training functionality. Please
91+
make sure that the trainig pre-requisites are satisfied before launching these
92+
tests. To launch these tests please run
93+
```
94+
python -m pytest tests
95+
```

conf/1gpu.yml

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
$include:
2+
- conf/base.yml
3+
4+
batch_size: 12
5+
val_batch_size: 12
6+
num_workers: 4

conf/ablations/baseline.yml

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
$include:
2+
- conf/base.yml
3+
- conf/1gpu.yml

conf/ablations/diff-mb.yml

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
$include:
2+
- conf/base.yml
3+
- conf/1gpu.yml
4+
5+
Discriminator.sample_rate: 44100
6+
Discriminator.fft_sizes: [2048, 1024, 512]
7+
Discriminator.bands:
8+
- [0.0, 0.05]
9+
- [0.05, 0.1]
10+
- [0.1, 0.25]
11+
- [0.25, 0.5]
12+
- [0.5, 1.0]
13+
14+
15+
# re-weight lambdas to make up for
16+
# lost discriminators vs baseline
17+
lambdas:
18+
mel/loss: 15.0
19+
adv/feat_loss: 5.0
20+
adv/gen_loss: 1.0
21+
vq/commitment_loss: 0.25
22+
vq/codebook_loss: 1.0

conf/ablations/equal-mb.yml

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
$include:
2+
- conf/base.yml
3+
- conf/1gpu.yml
4+
5+
Discriminator.sample_rate: 44100
6+
Discriminator.fft_sizes: [2048, 1024, 512]
7+
Discriminator.bands:
8+
- [0.0, 0.2]
9+
- [0.2, 0.4]
10+
- [0.4, 0.6]
11+
- [0.6, 0.8]
12+
- [0.8, 1.0]
13+
14+
15+
# re-weight lambdas to make up for
16+
# lost discriminators vs baseline
17+
lambdas:
18+
mel/loss: 15.0
19+
adv/feat_loss: 5.0
20+
adv/gen_loss: 1.0
21+
vq/commitment_loss: 0.25
22+
vq/codebook_loss: 1.0

conf/ablations/no-adv.yml

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
$include:
2+
- conf/base.yml
3+
- conf/1gpu.yml
4+
5+
lambdas:
6+
mel/loss: 1.0
7+
waveform/loss: 1.0
8+
vq/commitment_loss: 0.25
9+
vq/codebook_loss: 1.0

0 commit comments

Comments
 (0)