Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,3 @@ __pycache__/
.DS_Store
Notebooks/
config/

BAFU_e_CMYK_pos_hoch.pdf
my_figure.pdf
my_figure.png
6 changes: 5 additions & 1 deletion backend/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
FROM python:3.9
FROM python:3.11
COPY requirements.txt app/requirements.txt
WORKDIR /app
RUN pip install -r requirements.txt
RUN apt-get update && \
apt-get install -y openjdk-17-jre && \
apt-get clean && \
update-ca-certificates -f \
COPY . /app
EXPOSE 8000
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000" ,"--reload"]
16 changes: 2 additions & 14 deletions backend/app.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
from fastapi import FastAPI
import joblib
import numpy as np

from descriptors import get_maccs_fingerprints

model_pipeline = joblib.load('pepper_pipeline_model.pkl')
from predict_target_endpoint import predict
app = FastAPI()

@app.get("/")
Expand All @@ -13,11 +8,4 @@ async def read_root():

@app.get('/predict/')
async def serve_foo(smiles: str):
smiles_list = smiles.split(',')

# Calculate the MACCS fingerprints for the input data
X = get_maccs_fingerprints(smiles_list)

# Use the pipeline to make predictions
predicted_logB = model_pipeline.predict(X)
return np.round((10**predicted_logB ) *100).tolist()
return predict(smiles.split(','))
6 changes: 0 additions & 6 deletions backend/descriptors.py

This file was deleted.

Binary file not shown.
Binary file removed backend/pepper_pipeline_model.pkl
Binary file not shown.
26 changes: 26 additions & 0 deletions backend/predict_target_endpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from pepper_lab.predict import Predict
import pandas as pd

def predict(input_smiles):
data = pd.DataFrame(input_smiles, columns=["SMILES"])
pepper_predict = Predict(renku=True)
predictions_df = pepper_predict.predict_endpoint('pepper_object_wwtp_optimized_trained_model.pkl',
input_model_format='pickle', input_smiles=data,
input_smiles_type='dataframe') # The backend accepts single molecules

# Select what to show in the app
logb = predictions_df['logB_predicted']
breakthrough_perc = (10**logb)*100
rounded_b_perc = round(breakthrough_perc, 1)
predictions_df['Breakthrough (%)'] = rounded_b_perc

confidence = predictions_df['{}_predicted'.format(pepper_predict.model.target_variable_std_name)]
rounded_confidence = round(confidence, 2)
predictions_df['Confidence 0-1'] = rounded_confidence

predictions_df = predictions_df[[pepper_predict.model.compound_name,
pepper_predict.model.smiles_name,
'Breakthrough (%)',
'Confidence 0-1']]
predictions_df.fillna("", inplace=True)
return predictions_df.to_dict(orient="list")
11 changes: 5 additions & 6 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
fastapi==0.115.2
joblib==1.4.2
molfeat==0.10.0
numpy==1.26.0
scikit-learn==1.3.2
uvicorn==0.32.0
fastapi>=0.115.2,<1.0.0
pandas>=2.2.3,<3.0.0
pepper-lab>=1.1.0
rdkit>=2024.9.5,<2025.0.0
uvicorn>=0.32.0,<1.0.0
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: '3.9'
version: '3.12'
services:
frontend:
build: streamlit
Expand Down
11 changes: 5 additions & 6 deletions streamlit/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
FROM python:3.9

FROM python:3.12
COPY requirements.txt app/requirements.txt
WORKDIR /app

RUN pip install -r requirements.txt
RUN apt-get update && \
apt-get install -y openjdk-17-jre && \
apt-get clean && \
update-ca-certificates -f
COPY . /app

EXPOSE 8501

ENTRYPOINT ["streamlit","run"]

CMD ["app.py"]
62 changes: 46 additions & 16 deletions streamlit/app.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,69 @@
import pandas as pd
import streamlit as st
import requests
import pandas as pd
from rdkit.Chem import PandasTools
import requests

from utils import image_from_mol

@st.cache_data
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv().encode("utf-8")
example_csv = pd.read_csv('test_pepper_app.csv')

def main():

# Streamlit app title
st.title("PEPPER: an app to Predict Environmental Pollutant PERsistence ")

st.markdown("""
Currently we support the prediction of the expected percentage breakthrough of micropollutants from
conventional wastewater treatment, that is, the percentage that potentially escapes the plant
without being successfully removed. Visit section [Learn more](https://pepper-app.streamlit.app/Learn_more)
for further details.
""")

# Upload CSV file
uploaded_file = st.file_uploader("Upload a CSV file with chemical substance data", type="csv")


csv = convert_df(example_csv)

st.sidebar.download_button(
label="Download example file",
data=csv,
file_name="pepper_example.csv",
mime="text/csv",
)

if uploaded_file is not None:
# Load the uploaded data
input_data = pd.read_csv(uploaded_file)
df = pd.read_csv(uploaded_file)

# Show the input data
st.write("Uploaded data:", input_data)
st.write(" ### Uploaded data:", df)

response = requests.request("get", "http://backend:8000/predict/",
params={"smiles": ",".join(input_data.SMILES)})

# Show it as a dataframe
predictions_df = pd.DataFrame(input_data)
predictions_df['Breakthrough (%)'] = response.json()
print('Start predictions')
# Calculate using pepper-lab
response = requests.request("get", "http://backend:8000/predict/", params={"smiles": ",".join(df.SMILES)}).json()
df = pd.DataFrame.from_dict(response)

PandasTools.AddMoleculeColumnToFrame(df, 'SMILES', 'Structure')
df["Structure"] = df["Structure"].apply(image_from_mol)
df.drop(columns='SMILES', inplace=True)

# Show the predictions
st.write("Predictions:", predictions_df)

# PandasTools.AddMoleculeColumnToFrame(predictions_df, smilesCol='SMILES')
# predictions_df.rename(columns={'ROMol': 'Structure'})
# predictions_df.drop(columns='SMILES', inplace=True)
st.markdown(""" ### Predictions: """)
config = {
"Structure": st.column_config.ImageColumn(width="medium"),
}
st.dataframe(df, column_config=config, row_height=100)

st.markdown(predictions_df.to_html(escape=False), unsafe_allow_html=True)
# st.write("""
# 📢⚠️ The frame below shows the predictions along chemical structures.
# We are working to give you the chemical structures as part of the file to be downloaded. """)
#
# st.markdown(predictions_df.to_html(escape=False), unsafe_allow_html=True)


if __name__ == '__main__':
Expand Down
1 change: 0 additions & 1 deletion streamlit/packages.txt

This file was deleted.

13 changes: 4 additions & 9 deletions streamlit/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
joblib==1.4.2
molfeat==0.10.0
pandas==2.2.3
streamlit==1.38.0
scipy==1.12.0
matplotlib==3.6.2
numpy==1.26.0
scikit-learn==1.3.0
rdkit==2024.03.5
pandas>=2.2.3,<3.0.0
requests
rdkit>=2024.9.5,<2025.0.0
streamlit>=1.41.1
125 changes: 125 additions & 0 deletions streamlit/test_pepper_app.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
SMILES,Compound
O=c1[nH]cnc2c1ncn2C1CC(O)C(CO)O1,2'-Deoxyinosine
CC(C)C(O)(CC(=O)O)C(=O)O,2-Isopropylmalic acid
OCC1OC(OC2C(CO)OC(O)C(O)C2O)C(O)C(O)C1O,alpha-Lactose
O=P(c1ccccc1)(c1ccccc1)c1ccccc1,triphenylphosphineoxide
CCCCC(=O)N(Cc1ccc(-c2ccccc2-c2nn[nH]n2)cc1)C(C(=O)O)C(C)C,Valsartan
CC(C)(CC(=O)O)CC(=O)O,"3,3-Dimethylglutaric acid"
COCCc1ccc(OCC(O)CNC(C)C)cc1,Metoprolol
NC(Cc1ccc(O)cc1)C(=O)O,L-Tyrosine
O=c1[nH]sc2ccccc12,"1,2-Benzisothiazolin-3-one"
CC(CCC(=O)O)C1CCC2C3CCC4CC(O)CCC4(C)C3CC(O)C12C,Deoxycholic acid
O=C(O)C1CC(=O)N(Cc2ccco2)C1,1-(2-Furylmethyl)-5-oxopyrrolidine-3-carboxylic acid
Cc1ccc(C(N)=O)cn1,6-Methylnicotinamide
CNCCCC(C#N)(c1ccc(OC)c(OC)c1)C(C)C,"2-(3,4-Dimethoxyphenyl)-5-methylamino-2-isopropylvaleronitrile"
O=C(O)C(O)Cc1ccccc1,3-Phenyllactic acid
CC(C)CC(N)C(=O)NC(C(=O)O)C(C)C,Leucylvaline
CCOP(=O)(OCC)OCC,Triethyl phosphate
Nc1c2c(=O)[nH]c(O)cc2nn1-c1ccccc1,"3-amino-2-phenyl-2H-pyrazolo[4,3-c]pyridine-4,6-diol"
CC1CN2CC(C)OB(O1)OC(C)C2,Triisopropanolamine cyclic borate
CC(O)(CC(=O)O)C(=O)O,citramalic acid
N#Cc1cc(Br)c(O)c(Br)c1,Bromoxynil
CCCCNS(=O)(=O)c1ccccc1,N-Butylbenzenesulfonamide
O=c1cc[nH]c2ccccc12,4-Hydroxyquinoline
O=C1CC2(CCCCC2)CN1,"3,3-pentamethylene-4-butyrolactam"
CC(O)COC(C)COC(C)COC(C)COC(C)COC(C)COC(C)COC(C)COC(C)COC(C)COC(C)CO,Undecapropylene glycol
COc1cc(CC(N)C(=O)O)ccc1O,3-Methoxytyrosine
CC(Cc1ccc(C(C)C(=O)O)cc1)C(=O)O,Carboxyibuprofen
NC(=O)c1ccccc1O,Salicylamide
CC(C)CC(NC(=O)CN)C(=O)O,Glycyl-L-leucine
CCCCCC(O)C(O)CC=CCCCCCCCC(=O)O,(+/-)12(13)-DiHOME
CC(CC(=O)O)C(=O)O,Methylsuccinic acid
COc1cc(C=O)cc(OC)c1O,Syringaldehyde
CCC=CCC(O)C(O)C=CC(O)CCCCCCCC(=O)O,"(10e,15z)-9,12,13-Trihydroxyoctadeca-10,15-dienoic acid"
CC(CCl)OP(=O)(OC(C)CCl)OC(C)CCl,Tris(1-chloro-2-propyl)phosphate
CC(N)C12CC3CC(CC(C3)C1)C2,Rimantadine
CC1CC2OC2C=CC(=O)CC(=O)O1,Decarestrictine F
NC(Cc1ccccc1O)C(=O)O,2-Hydroxyphenylalanine
S=c1[nH]c2ccccc2s1,2-Mercaptobenzothiazole
Oc1ccc2ncccc2c1,6-Quinolinol
O=C(O)C1(O)CC(O)C(O)C(O)C1,D-(-)-Quinic acid
O=C(O)c1cc(O)c(O)c(O)c1,Gallic acid
COc1ccc(-c2coc3cc(O)cc(O)c3c2=O)cc1,Biochanin A
Nc1cccc(O)c1,3-Aminophenol
COc1cc(C(=O)O)ccc1O,Vanillic acid
CC12C=CC(=O)C=C1CCC1C2C(O)CC2(C)C1CCC2(O)C(=O)CO,Prednisolone
COc1cc(O)c(C(=O)c2ccccc2)cc1S(=O)(=O)O,Sulisobenzone
c1ccc2n[nH]nc2c1,Benzotriazole
Cc1nc(-c2ccc(OCC(C)C)c(C#N)c2)sc1C(=O)O,Febuxostat
Cc1cc(O)cc(C)c1S(C)(=O)=O,Methiocarb-TP methiocarb sulfone phenol (M05)
O=CNCc1ccccc1,N-Benzylformamide
OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO,"3,6,9,12,15,18,21,24,27,30,33,36,39,42-Tetradecaoxatetratetracontane-1,44-diol"
NC(=O)N1c2ccccc2C(O)C(O)c2ccccc21,"10,11-Dihydro-10,11-dihydroxycarbamazepine"
CC12CCC3c4ccc(OS(=O)(=O)O)cc4CCC3C1CCC2=O,Estrone sulfate
CCC=CCC=CCC=CCC=CCC=CCCCCCC(=O)O,Docosapentaenoic acid
O=C(O)CC1(C(=O)O)CCCCC1,1-(Carboxymethyl)cyclohexanecarboxylic acid
CC(=O)Nc1ccc(C)cc1C,"N-(2,4-Dimethylphenyl)acetamide"
NC(CC(=O)O)C(=O)NC(Cc1ccccc1)C(=O)O,L-Aspartyl-L-phenylalanine
Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(CC(O)C(O)C(O)CO)c2cc1C,Riboflavin
Cc1oc(C(=O)O)cc(=O)c1C,"5,6-dimethyl-4-oxo-4H-pyran-2-carboxylic acid"
CC(=O)NC(CC(C)C)C(=O)O,N-Acetyl-L-leucine
O=C(O)Cc1ccccc1Nc1c(Cl)cccc1Cl,Diclofenac
OCCN(CCO)CCO,Triethanolamine
Cc1ccncc1,4-Picoline
O=C(NC1CCCCC1)NC1CCCCC1,Dicyclohexylurea
CC(C)(Oc1ccc(Cl)cc1)C(=O)O,Clofibric acid
NCC(=O)N1CCCC1C(=O)O,Glycylproline
OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO,"3,6,9,12,15,18,21,24,27-Nonaoxanonacosane-1,29-diol"
NC(N)=NCCCC(N)C(=O)O,DL-Arginine
CN(C)CC1CCCCC1(O)c1cccc(O)c1,O-Desmethyl-cis-tramadol
CC(=O)Nc1ccc(O)c(C(=O)O)c1,N-Acetyl-5-aminosalicylic acid
CC(C)(C)NC(=O)NCCO,1-(2-Hydroxyethyl)-3-t-butylurea
O=C(O)c1ccc([O-])c(O)c1,"3,4-Dihydroxybenzoate"
Cc1cccc(C(=O)NCC(=O)O)c1,3-Methylhippuric acid
CC(C)CC(NC(=O)OC(C)(C)C)C(N)=O,tert-Butyl N-[1-(aminocarbonyl)-3-methylbutyl]carbamate
CCCCOP(=O)(O)OCCCC,Dibutyl phosphate
Cc1nc(C)c(C)nc1C,"2,3,5,6-Tetramethylpyrazine"
CN(C)CCC=C1c2ccccc2CCc2ccccc21,Amitriptyline
NCC(O)c1ccccc1,Phenylethanolamine
CC(C)NCC(O)COc1ccc(COCCOC(C)C)cc1,Bisoprolol
NC(CCC(=O)O)C(=O)NC(Cc1ccc(O)cc1)C(=O)O,Glutamyltyrosine
COc1ccc(C=O)cc1,4-Methoxybenzaldehyde
COc1cccc(CC(=O)O)c1,3-Methoxyphenylacetic acid
Cc1cc(=O)oc2cc(O)c(O)cc12,"6,7-Dihydroxy-4-methylcoumarin"
NCC1(CC(=O)O)CCCCC1,Gabapentin
O=C(O)c1ccc([N+](=O)[O-])cc1,4-Nitrobenzoic acid
Cc1cc(=O)oc2cc(N)ccc12,7-Amino-4-methylcoumarin
CC=C(C)C(=O)O,Tiglic acid
CCCc1nc(C(C)(C)O)c(C(=O)O)n1Cc1ccc(-c2ccccc2-c2nn[nH]n2)cc1,Olmesartan
OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO,Dodecaethylene glycol
Nc1ccccc1C(=O)O,Anthranilic acid
CN(C)C(=O)Oc1ccc[n+](C)c1,Pyridostigmine
O=S(=O)(O)c1ccccc1,Benzenesulfonic acid
COc1ccc(C2Sc3ccccc3N(CCN(C)C)C(=O)C2O)cc1,Desacetyl diltiazem
O=C(O)C(c1ccccc1Cl)N1CCc2sccc2C1,Clopidogrel carboxylic acid
Cn1cnc2[nH]c(=O)[nH]c(=O)c21,7-Methylxanthine
CN(CC(=O)O)C(=N)N,Creatine
CCCCCCC=CCCCCCCCC(=O)O,Palmitoleic acid
NC1CCCCC1,Cyclohexylamine
CC1NC(=O)NC1CCCCCC(=O)O,Desthiobiotin
O=C1CCCCN1,2-Piperidone
CC(C)(Oc1ccc(CCNC(=O)c2ccc(Cl)cc2)cc1)C(=O)O,Bezafibrate
CN1C(=O)CC(C)(c2ccccc2)C1=O,Methsuximide
O=S(=O)(O)c1ccc2nc(-c3ccccc3)[nH]c2c1,Phenylbenzimidazole sulfonic acid
O=C1OC(C(O)CO)C(O)=C1O,Ascorbic acid
CCN(CC)CC(=O)Nc1c(C)cccc1C,Lidocaine
O=C1NS(=O)(=O)c2ccccc21,Saccharin
O=C(O)CCc1ccc(O)cc1,3-(4-Hydroxyphenyl)propionic acid
CCC=CCC(O)C(O)CCC(O)CCCCCCCC(=O)O,"(15Z)-9,12,13-Trihydroxy-15-octadecenoic acid"
CCOP(=O)(Sc1ccccc1)Sc1ccccc1,EDDP
c1ccc2c(c1)[nH]c1cnccc12,Norharman
CCCCCCC(=O)O,Heptanoic acid
NC(CCC(=O)O)C(=O)O,L-Glutamic acid
CN1CC(=O)NC1=O,N-Methylhydantoin
CC(=O)CCc1ccc(O)cc1,4-(4-Hydroxyphenyl)butan-2-one
CCCCCCCC(O)CC(=O)O,3-Hydroxydecanoic acid
c1ccc(-c2cnc[nH]2)cc1,4-Phenylimidazole
CN1CCC23c4c5ccc(O)c4OC2C(O)C=CC3C1C5,Morphine
O=C(O)c1c[nH]c2ccccc12,3-Indolylcarboxylic Acid
CC(C)C1NC(=O)C2CCCN2C1=O,XLUAWXQORJEMBD-UHFFFAOYSA-N
O=C(O)C=Cc1ccc(O)cc1,trans-4-hydroxycinnamic acid
O=S(=O)(c1ccc(O)cc1)c1ccc(O)cc1,"4,4'-Sulfonyldiphenol"
O=C(O)CCCCCCCCCCCC(=O)O,Tridecanedioic acid
CC(N)C(=O)N1CCCC1C(=O)O,L-Alanyl-L-proline
CC(C)N(C(=O)C(=O)O)c1ccccc1,Propachlor OXA
CC1(C)CC(O)CC(C)(C)N1CCO,"4-Hydroxy-1-(2-hydroxyethyl)-2,2,6,6-tetramethylpiperidine"
12 changes: 12 additions & 0 deletions streamlit/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import io
import base64
from rdkit.Chem import Draw


def image_from_mol(mol):
img = Draw.MolToImage(mol)
bio = io.BytesIO()
img.save(bio, format="PNG")
img_bytes = bio.getvalue()
base64_str = base64.b64encode(img_bytes).decode("utf-8")
return f"data:image/png;base64,{base64_str}"