Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -135,4 +135,5 @@ dmypy.json
/.idea/

.DS_Store
wandb
wandb
*.sh
14 changes: 14 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"python.testing.pytestArgs": [
"tests"
],
"python.testing.unittestEnabled": true,
"python.testing.pytestEnabled": false,
"python.testing.unittestArgs": [
"-v",
"-s",
"./tests",
"-p",
"*test*.py"
]
}
1 change: 1 addition & 0 deletions deepsoftlog/algebraic_prover/algebras/sdd_algebra.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,4 @@ def zero(self) -> SddFormula:

def reset(self):
self.all_facts = FastList()
self.manager.garbage_collect()
5 changes: 4 additions & 1 deletion deepsoftlog/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from .query import Query
from ..logic.soft_term import SoftTerm, TensorTerm


def load_tsv_file(filename: str):
with open(filename, "r") as f:
return [line.strip().split("\t") for line in f.readlines()]
Expand All @@ -19,3 +18,7 @@ def data_to_prolog(rows, name="r", **kwargs):

def to_prolog_image(img):
return SoftTerm(Expr("lenet5", TensorTerm(img)))


def to_prolog_text(text):
return SoftTerm(Expr("roberta", TensorTerm(text)))
18 changes: 15 additions & 3 deletions deepsoftlog/embeddings/embedding_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ..parser.vocabulary import Vocabulary
from .distance import embedding_similarity
from .initialize_vector import Initializer
from ..logic.soft_term import TensorTerm
from ..logic.soft_term import TensorTerm, TextTerm
from .nn_models import EmbeddingFunctor
from deepsoftlog.algebraic_prover.terms.expression import Expr

Expand Down Expand Up @@ -53,7 +53,7 @@ def forward(self, term: Expr):
return e

def _embed_constant(self, term: Expr):
if isinstance(term, TensorTerm):
if isinstance(term, TensorTerm) or isinstance(term, TextTerm):
return term.get_tensor().to(self.device)

name = term.functor
Expand All @@ -70,16 +70,28 @@ def _embed_functor(self, functor: Expr):

def clear_cache(self):
self._cache = dict()
if "('roberta', 1)" in self.functor_embeddings.keys():
self.functor_embeddings["('roberta', 1)"].clear_cache()
if "('text', 1)" in self.functor_embeddings.keys():
self.functor_embeddings["('text', 1)"].clear_cache()

def to(self, device):
self.device = device
return super().to(device)

def get_soft_unification_matrix(self, distance_metric: str, names):
n = len(names)
matrix = torch.zeros(n, n)
for i, c1 in enumerate(names):
for j, c2 in enumerate(names):
e1, e2 = self.constant_embeddings[c1], self.constant_embeddings[c2]
matrix[i, j] = embedding_similarity(e1, e2, distance_metric) # log probabilities
return matrix.detach().numpy()

def create_embedding_store(config, vocab_sources: Iterable) -> EmbeddingStore:
ndim = config['embedding_dimensions']
vocabulary = create_vocabulary(vocab_sources)
initializer = Initializer(EmbeddingFunctor, config['embedding_initialization'], ndim)
initializer = Initializer(EmbeddingFunctor, config['embedding_initialization'], ndim, config.get("text_embedding_mode"), config.get("freeze_layers"))
store = EmbeddingStore(ndim, initializer, vocabulary)
return store

Expand Down
14 changes: 11 additions & 3 deletions deepsoftlog/embeddings/initialize_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,22 @@
from torch import Tensor
from torch import nn

from ..embeddings.nn_models import LeNet5
from ..embeddings.nn_models import LeNet5, RobertaBase, BaselineTextEmbedder

SPECIAL_MODELS = {
("lenet5", 1): LeNet5,
}

SPECIAL_PRETRAINED_MODELS = {
("roberta", 1): RobertaBase,
}

class Initializer:
def __init__(self, model: nn.Module, init_mode: str, ndim: int):
def __init__(self, model: nn.Module, init_mode: str, ndim: int, text_embedding_mode: str = None, freeze_layers: int = 12):
self.ndim = ndim
self.init_mode = init_mode
self.model = model
self.text_embedding_mode = text_embedding_mode
self.freeze_layers = freeze_layers

def __call__(self, x) -> Tensor | nn.Module:
if isinstance(x, str):
Expand All @@ -36,8 +40,12 @@ def _initialize_constant(self, name: str) -> Tensor:
return embedding

def _initialize_functor(self, name: str, arity: int) -> nn.Module:
if name == "text" and arity == 1:
return RobertaBase(self.ndim) if self.text_embedding_mode == "LM" else BaselineTextEmbedder(self.ndim)
if (name, arity) in SPECIAL_MODELS:
return SPECIAL_MODELS[(name, arity)](self.ndim)
if (name, arity) in SPECIAL_PRETRAINED_MODELS: # Pretrained models have the freeze_layers argument
return SPECIAL_PRETRAINED_MODELS[(name, arity)](self.ndim, freeze_layers=self.freeze_layers)
return self.model(arity, self.ndim)


Expand Down
Loading