Python Code



## Requirements 
    ## tensorflow==1.15.2
    ## tensor2tensor==1.14


## Import Packages 
import re
from tensor2tensor.data_generators import problem
from tensor2tensor.data_generators import text_problems
from tensor2tensor.utils import registry
from tensor2tensor import problems

import os
import numpy as np
import pandas as pd
import tensorflow as tf
import shutil


# Enable TF Eager execution
tfe = tf.contrib.eager
tfe.enable_eager_execution()

# Other setup
Modes = tf.estimator.ModeKeys

#%%

## Required Folder Creation

HOME_PATH = "C:/Raju/Translation_model/Translation_t2t/t2t/"
data_dir = os.path.expanduser(HOME_PATH + "data")  # This folder contain the data
tmp_dir = os.path.expanduser(HOME_PATH + "tmp")  # Ths folder contains temp data if any
train_dir = os.path.expanduser(HOME_PATH + "train")  # This folder contain the model
export_dir = os.path.expanduser(HOME_PATH + "export")  # This folder contain the exported model for production
translations_dir = os.path.expanduser(HOME_PATH + "translation")  # This folder contain  all translated sequence
event_dir = os.path.expanduser(HOME_PATH + "event")  # Test the BLEU score
usr_dir = os.path.expanduser(HOME_PATH + "user")  # This folder contains our data that we want to add
checkpoint_dir = os.path.expanduser(HOME_PATH + "checkpoints")

#%% 

##
shutil.rmtree(data_dir)
shutil.rmtree(train_dir)

#%%
## Creating folders

tf.io.gfile.makedirs(data_dir)
tf.io.gfile.makedirs(tmp_dir)
tf.io.gfile.makedirs(export_dir)
tf.io.gfile.makedirs(translations_dir)
tf.io.gfile.makedirs(train_dir)
tf.io.gfile.makedirs(event_dir)
tf.io.gfile.makedirs(usr_dir)
tf.io.gfile.makedirs(checkpoint_dir)


#%%


@registry.register_problem
class translationsig(text_problems.Text2TextProblem):
  """Predict RX SIG using Standardized SIG"""

  @property
  def approx_vocab_size(self):
    return 2**13*2  # ~16k

  @property
  def is_generate_per_split(self):
    # generate_data will shard the data into TRAIN and EVAL for us.
    return True

  @property
  def dataset_splits(self):
    """Splits of data to produce and number of output shards for each."""
    # 10% evaluation data
    return [{
        "split": problem.DatasetSplit.TRAIN,
        "shards": 8,
    }, {
        "split": problem.DatasetSplit.EVAL,
        "shards": 2,
    }]

  def generate_samples(self, data_dir, tmp_dir, dataset_split):
    del data_dir
    del tmp_dir
    del dataset_split


    workspace = Workspace(subscription_id, resource_group, workspace_name)

    dataset = Dataset.get_by_name(workspace, name='Sample_DL')
    sig_data = dataset.to_pandas_dataframe()
    
    print(sig_data.shape)
    
    
    ## Data Pre Processing
    
    ## Selecting Columns 
    sig_data = sig_data[['Standardized_SIG', 'IC+_Pharmacist SIG']]
    sig_data = sig_data.drop_duplicates()
    
    #print(sig_data.shape)
    
    ## Cleaning column names
    sig_data.columns = sig_data.columns.str.replace(' ','_')
    sig_data.columns = sig_data.columns.str.replace('+','')
    
    #sig_data.Standardized_SIG = sig_data.Standardized_SIG.str.lower()
    #sig_data.IC_Pharmacist_SIG = sig_data.IC_Pharmacist_SIG.str.lower()
    
    
    ## Data Cleaning
    sig_data.Standardized_SIG = sig_data.Standardized_SIG.map(lambda x: re.sub(r'.$', "",x))
    sig_data.IC_Pharmacist_SIG = sig_data.IC_Pharmacist_SIG.map(lambda x: re.sub(r'"$', "",x))
    sig_data.Standardized_SIG = sig_data.Standardized_SIG.str.strip()
    sig_data.IC_Pharmacist_SIG = sig_data.IC_Pharmacist_SIG.str.strip()

    # sig_data.Standardized_SIG = sig_data.Standardized_SIG.str.strip()
    # sig_data.IC_Pharmacist_SIG = sig_data.IC_Pharmacist_SIG.str.strip()
    
    # sig_data.Standardized_SIG = sig_data.Standardized_SIG.apply(lambda x: re.sub(r'.$', "",x))
    # sig_data.IC_Pharmacist_SIG = sig_data.IC_Pharmacist_SIG.apply(lambda x: re.sub(r'"$', "",x))

    #sig_data.Standardized_SIG = sig_data.Standardized_SIG.str.rstrip('[",]')
    #sig_data.IC_Pharmacist_SIG = sig_data.IC_Pharmacist_SIG.str.rstrip('[",]')
    #sig_data.Standardized_SIG = sig_data.Standardized_SIG.str.rstrip('[",]')
    #sig_data.IC_Pharmacist_SIG = sig_data.IC_Pharmacist_SIG.str.rstrip('[",]')
    
    sig_data.Standardized_SIG = sig_data.Standardized_SIG.str.upper()
    sig_data.IC_Pharmacist_SIG = sig_data.IC_Pharmacist_SIG.str.upper()
    
    #sig_data.Standardized_SIG = sig_data.Standardized_SIG.str.strip()
    #sig_data.IC_Pharmacist_SIG = sig_data.IC_Pharmacist_SIG.str.strip()
    
    sig_data = sig_data[sig_data.IC_Pharmacist_SIG != '.']
    sig_data = sig_data[sig_data.IC_Pharmacist_SIG != '']
    sig_data = sig_data[sig_data.IC_Pharmacist_SIG != '...']
    sig_data = sig_data.dropna()
    
    sig_data.IC_Pharmacist_SIG = sig_data.IC_Pharmacist_SIG.apply(lambda x:re.sub(r'\bTAKE\b',"Tk",x))
    sig_data.IC_Pharmacist_SIG = sig_data.IC_Pharmacist_SIG.apply(lambda x:re.sub(r'\bONE\b',"1",x))
    
    print("Final Shape : {}".format(sig_data.shape))
    
    for sig in range(len(sig_data)):
        yield {
          "inputs": sig_data.Standardized_SIG.iloc[sig],
          "targets":sig_data.IC_Pharmacist_SIG.iloc[sig],
        }
#%%
## Model name and Parameters selection
PROBLEM = "translationsig" # Custom ESIG Translation Problem
MODEL = "transformer" # Our model
HPARAMS = "transformer_base" # Hyperparameters for the model by default
                            # If you have a one gpu, use transformer_big_single_gpu
#%%

# Setup helper functions for encoding and decoding

def encode(input_str, output_str=None):
  """Input str to features dict, ready for inference"""
  inputs = encoders["inputs"].encode(input_str) + [1]  # add EOS id
  batch_inputs = tf.reshape(inputs, [1, -1, 1])  # Make it 3D.
  return {"inputs": batch_inputs}

def decode(integers):
  """List of ints to str"""
  integers = list(np.squeeze(integers))
  if 1 in integers:
    integers = integers[:integers.index(1)]
  return encoders["inputs"].decode(np.squeeze(integers))


#%%

#Data generation

print('Generating data')
problem_definition = translationsig()
t2t_problem = problems.problem(PROBLEM)
t2t_problem.generate_data(data_dir, tmp_dir)

print("Data Generated.")

#%%


# Get the encoders from the problem
encoders = t2t_problem.feature_encoders(data_dir)

example = tfe.Iterator(t2t_problem.dataset(Modes.TRAIN, data_dir)).next()
inputs = [int(x) for x in example["inputs"].numpy()] # Cast to ints.
targets = [int(x) for x in example["targets"].numpy()] # Cast to ints.



# Example inputs as int-tensor.
print("Inputs, encoded:")
print(inputs)
print("Inputs, decoded:")
# Example inputs as a sentence.
print(decode(inputs))
# Example targets as int-tensor.
print("Targets, encoded:")
print(targets)
# Example targets as a sentence.
print("Targets, decoded:")
print(decode(targets))

#%%
from tensor2tensor import models

#print(problems.available()) #Show all problems
print(registry.list_models()) #Show all registered models

#%%
from tensor2tensor.utils.trainer_lib import create_hparams

# Init Hparams object from T2T Problem
hparams = create_hparams(HPARAMS)
                         
                         #data_dir=data_dir, problem_name="translation_esig")

print(hparams.to_json())
#%%
# Make Chngaes to Hparams
hparams.batch_size = 500
hparams.learning_rate_warmup_steps = 4500
hparams.learning_rate = .4
save_checkpoints_steps = 1000

print(hparams.to_json())
#%%
from tensor2tensor.utils.trainer_lib import create_run_config, create_experiment

# Initi Run COnfig for Model Training
RUN_CONFIG = create_run_config(
      model_dir=train_dir,
      model_name=MODEL,
      save_checkpoints_steps= save_checkpoints_steps# Location of where model file is store
      # More Params here in this fucntion for controling how noften to tave checkpoints and more. 
)

# # Create Tensorflow Experiment Object
tensorflow_exp_fn = create_experiment(
        run_config=RUN_CONFIG,
        hparams=hparams,
        model_name=MODEL,
        problem_name=PROBLEM,
        data_dir=data_dir, 
        train_steps=400, # Total number of train steps for all Epochs
        eval_steps=100 # Number of steps to perform for each evaluation
    )

tensorflow_exp_fn.train_and_evaluate()

#%%

translate_model = registry.model(MODEL)(hparams, Modes.PREDICT)
#%%
#enfr_problem = problems.problem(PROBLEM)

# Copy the vocab file locally so we can encode inputs and decode model outputs
vocab_name = "vocab.sig_translator.16384.subwords"
vocab_file = os.path.join(data_dir, vocab_name)

# Get the encoders from the problem
#encoders = enfr_problem.feature_encoders(DATA_DIR)

# Copy the pretrained checkpoint locally

ckpt_name = "transformer_esig"
# gs_ckpt = os.path.join(gs_ckpt_dir, ckpt_name)

# ckpt_path = tf.train.latest_checkpoint(os.path.join(checkpoint_dir, ckpt_name))
# print(ckpt_path)

ckpt_path = tf.train.latest_checkpoint(os.path.join(train_dir))
print(ckpt_path)

#%%
def translate(inputs):
  encoded_inputs = encode(inputs)
  with tfe.restore_variables_on_create(ckpt_path):
    model_output = translate_model.infer(encoded_inputs)["outputs"]
  return decode(model_output)
#%%
inputs = "TAKE 1 TABLET 2 TIMES DAILY"

outputs = translate(inputs)

print("Inputs: %s" % inputs)
print("Outputs: %s" % outputs)


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Python Code #1

Requirements

Import Packages

Enable TF Eager execution

Other setup

Required Folder Creation

Creating folders

Model name and Parameters selection

Setup helper functions for encoding and decoding

Get the encoders from the problem

Example inputs as int-tensor.

Example inputs as a sentence.

Example targets as int-tensor.

Example targets as a sentence.

Init Hparams object from T2T Problem

Make Chngaes to Hparams

Initi Run COnfig for Model Training

# Create Tensorflow Experiment Object

Copy the vocab file locally so we can encode inputs and decode model outputs

Get the encoders from the problem

Copy the pretrained checkpoint locally

gs_ckpt = os.path.join(gs_ckpt_dir, ckpt_name)

ckpt_path = tf.train.latest_checkpoint(os.path.join(checkpoint_dir, ckpt_name))

print(ckpt_path)

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Python Code #1

Description

Requirements

Import Packages

Enable TF Eager execution

Other setup

Required Folder Creation

Creating folders

Model name and Parameters selection

Setup helper functions for encoding and decoding

Get the encoders from the problem

Example inputs as int-tensor.

Example inputs as a sentence.

Example targets as int-tensor.

Example targets as a sentence.

Init Hparams object from T2T Problem

Make Chngaes to Hparams

Initi Run COnfig for Model Training

# Create Tensorflow Experiment Object

Copy the vocab file locally so we can encode inputs and decode model outputs

Get the encoders from the problem

Copy the pretrained checkpoint locally

gs_ckpt = os.path.join(gs_ckpt_dir, ckpt_name)

ckpt_path = tf.train.latest_checkpoint(os.path.join(checkpoint_dir, ckpt_name))

print(ckpt_path)

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions