#Libraries needed
import sqlalchemy
import urllib.parse
import pandas as pd
import getpass
from SciServer import Authentication
from dateutil.relativedelta import relativedelta
import datetime
myUserName = Authentication.getKeystoneUserWithToken(Authentication.getToken()).userName
passwd = getpass.getpass('Password for ' + myUserName + ': ')
user = "win\\" + myUserName


#SQL Driver
driver="FreeTDS"
tds_ver="8.0"
# Database
host_ip="ESMPMDBPR4.WIN.AD.JHU.EDU" # Update this accordingly
db_port="1433"
db="CAMP_PMCoe_Projection" # Update this accordingly
# Create Connection String
conn_str=("DRIVER={};Server={};PORT={};DATABASE={};UID={};PWD={};TDS_VERSION={}"
.format(driver, host_ip, db_port, db, user, passwd, tds_ver)
)
# Create Engine
engine = sqlalchemy.create_engine('mssql+pyodbc:///?odbc_connect=' +
urllib.parse.quote(conn_str)
)


#View available tables
query = "SELECT name FROM SYS.tables"
df = pd.read_sql_query(query, engine)
df


#Put patient table into a dataframe
query="SELECT top 1000 * FROM patients ORDER BY osler_id"
pat = pd.read_sql_query(query, engine)
pat.head()


#Put vitals table into a dataframe
query="SELECT top 10000 * FROM vitals_temperature ORDER BY osler_id"
vitals = pd.read_sql_query(query, engine)
vitals.head()


#Put symptom table into a dataframe
query="SELECT top 10000 * FROM symptoms ORDER BY osler_id"
symp = pd.read_sql_query(query, engine)
symp.head()


#merge patient and encounter table together, based on patient-level identifier osler_id
df = pat.merge(vitals, on = "osler_id")
df.head()


#select only cols of interest and remove duplicate rows
df = df[["osler_id", "date_of_birth", "encounter_id", "encounter_type", "admission_date", "discharge_date"]].drop_duplicates().reset_index(drop=True)
df.head()


#Calculate age at encounter and add it as a column
df["age_at_encounter"] = ((pd.DatetimeIndex(df['admission_date']) - pd.DatetimeIndex(df['date_of_birth'])).days/365.24251).astype("int")
df.head()


#Filter out encounters where patient was under 18 years old and where the encounter was a hosp enc
df = df[(df["age_at_encounter"] >= 18) & (df["encounter_type"].str.contains('Hosp'))]
df.head()


#Sort data frame
df = df.sort_values(['osler_id','admission_date']) 
df = df.reset_index(drop=True)
df.head()


#30-day readmit
df['Readmit30'] = df.groupby('osler_id').apply(lambda x: (((x['admission_date'].shift(-1)-x['discharge_date']).dt.days.shift(1).le(30)) ).astype(int)).values
df.head(10)


#Add in symptom data, joining on encounter-level identifier
df = df.merge(symp, on = "encounter_id", suffixes = (None, "_2")).drop_duplicates().reset_index(drop=True)
#Clean up dataframe
df = df.drop("osler_id_2", axis = 1)
df.head(20)

	name
0	patients
1	encounters
2	labs
3	meds
4	problemlist
5	procedures
6	symptoms
7	vitals_BP
8	vitals_height
9	vitals_pulse
10	vitals_respiration
11	vitals_temperature
12	vitals_weight
13	sysdiagrams

	osler_id	date_of_birth	gender	race	ethnicity
0	0000c828-f84f-485d-9fa9-66146905ddb3	1983-11-30	Female	Black or African American	Not Hispanic or Latino
1	0001f556-4601-4ff6-b4e0-b7d34cbf5a0d	1999-08-11	Male	Black or African American	Not Hispanic or Latino
2	00025a09-d936-4786-a5de-52c98bc0bd00	1960-08-30	Female	White or Caucasian	Not Hispanic or Latino
3	0003a6f6-cea5-4389-be58-09c4f80206e6	1973-10-14	Female	White or Caucasian	Not Hispanic or Latino
4	000687c9-13f7-4da3-84f7-956ec7b08cfe	1946-11-24	Female	White or Caucasian	Not Hispanic or Latino

	osler_id	encounter_id	encounter_type	admission_date	discharge_date	temperature	temperature_date
0	0000c828-f84f-485d-9fa9-66146905ddb3	590455	Hospital Encounter	2017-04-11 16:35:00	2017-04-11 19:26:00	97.5	2017-04-11 16:30:00
1	000687c9-13f7-4da3-84f7-956ec7b08cfe	8083	Office Visit	2016-07-29 00:00:00	NaT	96.9	2016-07-29 09:39:00
2	000687c9-13f7-4da3-84f7-956ec7b08cfe	680554	Office Visit	2017-08-04 00:00:00	NaT	98.6	2017-08-04 10:04:00
3	000687c9-13f7-4da3-84f7-956ec7b08cfe	675067	Office Visit	2017-06-07 00:00:00	NaT	97.4	2017-06-07 15:09:00
4	000687c9-13f7-4da3-84f7-956ec7b08cfe	620459	Hospital Encounter	2017-05-24 05:41:00	2017-06-01 13:20:00	99.0	2017-05-27 08:00:00

	osler_id	encounter_id	diagnosis_code_icd10	diagnosis_code_icd9	diagnosis_name
0	0000c828-f84f-485d-9fa9-66146905ddb3	57265	J45.41	493.92	Asthma, moderate persistent, with acute exacer...
1	0000c828-f84f-485d-9fa9-66146905ddb3	57265	Z01.419	V72.31	Encounter for gynecological examination
2	0000c828-f84f-485d-9fa9-66146905ddb3	57265	N64.52	611.79	Breast discharge
3	0000c828-f84f-485d-9fa9-66146905ddb3	412810	N94.10	625.0	Dyspareunia, female
4	0000c828-f84f-485d-9fa9-66146905ddb3	399639	M62.838	728.85	Levator spasm

	osler_id	date_of_birth	gender	race	ethnicity	encounter_id	encounter_type	admission_date	discharge_date	temperature	temperature_date
0	0000c828-f84f-485d-9fa9-66146905ddb3	1983-11-30	Female	Black or African American	Not Hispanic or Latino	590455	Hospital Encounter	2017-04-11 16:35:00	2017-04-11 19:26:00	97.5	2017-04-11 16:30:00
1	000687c9-13f7-4da3-84f7-956ec7b08cfe	1946-11-24	Female	White or Caucasian	Not Hispanic or Latino	8083	Office Visit	2016-07-29 00:00:00	NaT	96.9	2016-07-29 09:39:00
2	000687c9-13f7-4da3-84f7-956ec7b08cfe	1946-11-24	Female	White or Caucasian	Not Hispanic or Latino	680554	Office Visit	2017-08-04 00:00:00	NaT	98.6	2017-08-04 10:04:00
3	000687c9-13f7-4da3-84f7-956ec7b08cfe	1946-11-24	Female	White or Caucasian	Not Hispanic or Latino	675067	Office Visit	2017-06-07 00:00:00	NaT	97.4	2017-06-07 15:09:00
4	000687c9-13f7-4da3-84f7-956ec7b08cfe	1946-11-24	Female	White or Caucasian	Not Hispanic or Latino	620459	Hospital Encounter	2017-05-24 05:41:00	2017-06-01 13:20:00	99.0	2017-05-27 08:00:00

Python Data Cleaning Example¶

First Steps¶

Joining Tables¶

30-Day Readmission¶

	osler_id	date_of_birth	encounter_id	encounter_type	admission_date	discharge_date	age_at_encounter	Readmit30
0	0000c828-f84f-485d-9fa9-66146905ddb3	1983-11-30	590455	Hospital Encounter	2017-04-11 16:35:00	2017-04-11 19:26:00	33	0
1	000687c9-13f7-4da3-84f7-956ec7b08cfe	1946-11-24	620459	Hospital Encounter	2017-05-24 05:41:00	2017-06-01 13:20:00	70	0
2	0008cc24-ac52-4c87-8b9d-e46ff29a1760	1965-12-19	444663	Hospital Encounter	2017-01-23 15:41:00	2017-01-23 23:59:00	51	0
3	0008cc24-ac52-4c87-8b9d-e46ff29a1760	1965-12-19	454011	Hospital Encounter	2017-02-12 10:11:00	2017-02-12 10:24:00	51	1
4	00091b8b-beec-49cb-afcd-4e3b2c36ccbf	1988-10-23	164050	Hospital Encounter	2016-03-26 21:26:00	2016-03-29 18:48:00	27	0
5	00091b8b-beec-49cb-afcd-4e3b2c36ccbf	1988-10-23	509425	Hospital Encounter	2017-02-19 20:28:00	2017-02-20 02:00:00	28	0
6	0009ac4e-1c70-40cf-910f-a6d2b2339c29	1983-12-26	575151	Hospital Encounter	2017-11-15 09:06:00	2017-11-15 23:59:00	33	0
7	0009b201-21f9-48b8-94c0-853d96914ffa	1948-06-30	140015	Hospital Encounter	2016-05-08 10:55:00	2016-05-08 23:59:00	67	0
8	0009b201-21f9-48b8-94c0-853d96914ffa	1948-06-30	189591	Hospital Encounter	2016-06-02 15:22:00	2016-06-02 23:59:00	67	1
9	0009b201-21f9-48b8-94c0-853d96914ffa	1948-06-30	218953	Hospital Encounter	2016-07-08 12:39:00	2016-07-08 23:59:00	68	0

	osler_id	date_of_birth	encounter_id	encounter_type	admission_date	discharge_date	age_at_encounter	Readmit30	diagnosis_code_icd10	diagnosis_code_icd9	diagnosis_name
0	0000c828-f84f-485d-9fa9-66146905ddb3	1983-11-30	590455	Hospital Encounter	2017-04-11 16:35:00	2017-04-11 19:26:00	33	0	N93.9	623.8	Vaginal bleeding
1	000687c9-13f7-4da3-84f7-956ec7b08cfe	1946-11-24	620459	Hospital Encounter	2017-05-24 05:41:00	2017-06-01 13:20:00	70	0	G93.9	348.9	Brain mass
2	000687c9-13f7-4da3-84f7-956ec7b08cfe	1946-11-24	620459	Hospital Encounter	2017-05-24 05:41:00	2017-06-01 13:20:00	70	0	E53.8	266.2	Vitamin B12 deficiency
3	000687c9-13f7-4da3-84f7-956ec7b08cfe	1946-11-24	620459	Hospital Encounter	2017-05-24 05:41:00	2017-06-01 13:20:00	70	0	D33.2	225.0	Benign neoplasm of brain, unspecified brain re...
4	000687c9-13f7-4da3-84f7-956ec7b08cfe	1946-11-24	620459	Hospital Encounter	2017-05-24 05:41:00	2017-06-01 13:20:00	70	0	D32.9	225.2	Meningioma
5	0008cc24-ac52-4c87-8b9d-e46ff29a1760	1965-12-19	444663	Hospital Encounter	2017-01-23 15:41:00	2017-01-23 23:59:00	51	0	M46.96	721.3	Lumbar facet arthropathy
6	0008cc24-ac52-4c87-8b9d-e46ff29a1760	1965-12-19	454011	Hospital Encounter	2017-02-12 10:11:00	2017-02-12 10:24:00	51	1	M46.96	721.3	Lumbar facet arthropathy
7	00091b8b-beec-49cb-afcd-4e3b2c36ccbf	1988-10-23	164050	Hospital Encounter	2016-03-26 21:26:00	2016-03-29 18:48:00	27	0	J45.901	493.92	Asthma with acute exacerbation, unspecified as...
8	00091b8b-beec-49cb-afcd-4e3b2c36ccbf	1988-10-23	509425	Hospital Encounter	2017-02-19 20:28:00	2017-02-20 02:00:00	28	0	R06.02	786.05	Shortness of breath
9	00091b8b-beec-49cb-afcd-4e3b2c36ccbf	1988-10-23	509425	Hospital Encounter	2017-02-19 20:28:00	2017-02-20 02:00:00	28	0	R07.9	786.50	Chest pain, unspecified type
10	00091b8b-beec-49cb-afcd-4e3b2c36ccbf	1988-10-23	509425	Hospital Encounter	2017-02-19 20:28:00	2017-02-20 02:00:00	28	0	R05	786.2	Cough
11	0009ac4e-1c70-40cf-910f-a6d2b2339c29	1983-12-26	575151	Hospital Encounter	2017-11-15 09:06:00	2017-11-15 23:59:00	33	0	M51.9	722.93	Lumbar disc disease
12	0009b201-21f9-48b8-94c0-853d96914ffa	1948-06-30	140015	Hospital Encounter	2016-05-08 10:55:00	2016-05-08 23:59:00	67	0	C55	179	Uterine leiomyosarcoma
13	0009b201-21f9-48b8-94c0-853d96914ffa	1948-06-30	140015	Hospital Encounter	2016-05-08 10:55:00	2016-05-08 23:59:00	67	0	C54.9	182.0	Uterine sarcoma
14	0009b201-21f9-48b8-94c0-853d96914ffa	1948-06-30	189591	Hospital Encounter	2016-06-02 15:22:00	2016-06-02 23:59:00	67	1	C55	179	Uterine leiomyosarcoma
15	0009b201-21f9-48b8-94c0-853d96914ffa	1948-06-30	189591	Hospital Encounter	2016-06-02 15:22:00	2016-06-02 23:59:00	67	1	C54.9	182.0	Uterine sarcoma
16	0009b201-21f9-48b8-94c0-853d96914ffa	1948-06-30	218953	Hospital Encounter	2016-07-08 12:39:00	2016-07-08 23:59:00	68	0	C55	179	Uterine leiomyosarcoma
17	0009b201-21f9-48b8-94c0-853d96914ffa	1948-06-30	218953	Hospital Encounter	2016-07-08 12:39:00	2016-07-08 23:59:00	68	0	C54.9	182.0	Uterine sarcoma
18	0009b201-21f9-48b8-94c0-853d96914ffa	1948-06-30	272630	Hospital Encounter	2016-08-17 13:19:00	2016-08-23 15:02:00	68	0	C54.9	182.0	Uterine sarcoma
19	0009b201-21f9-48b8-94c0-853d96914ffa	1948-06-30	272630	Hospital Encounter	2016-08-17 13:19:00	2016-08-23 15:02:00	68	0	K56.609	560.9	SBO (small bowel obstruction)