import pyodbc
import pandas as pd
import datetime as dt
import sqlalchemy
import urllib.parse
import matplotlib.pyplot as plt


import getpass
from SciServer import Authentication

myUserName = Authentication.getKeystoneUserWithToken(Authentication.getToken()).userName
passwd = getpass.getpass('Password for ' + myUserName + ': ')
user = "win\\" + myUserName

Password for pstable1: ········


#SQL Driver
driver="FreeTDS"
tds_ver="8.0"
# Database
host_ip="ESMPMDBPR4.WIN.AD.JHU.EDU" # Update this accordingly
db_port="1433"
db="CAMP_PMCoe_Projection" # Update this accordingly
# Create Connection String
conn_str=("DRIVER={};Server={};PORT={};DATABASE={};UID={};PWD={};TDS_VERSION={}"
.format(driver, host_ip, db_port, db, user, passwd, tds_ver)
)
# Create Engine
engine = sqlalchemy.create_engine('mssql+pyodbc:///?odbc_connect=' +
urllib.parse.quote(conn_str)
)
# Replace this Query with one that is intended for the "projection" database of interest
#query="SELECT * FROM INFORMATION_SCHEMA.TABLES where TABLE_NAME like '%asthma%'"
#data_frame = pandas.read_sql_query(query, engine)
#data_frame.head(100)


pd.read_sql_query("SELECT * FROM SYS.TABLES;", engine)


demo = pd.read_sql_query("Select * from patients",engine)


demo.head()


enc = pd.read_sql_query("Select * from encounters",engine)


enc.head()


encdemo = pd.merge(left=enc, right=demo, on='osler_id', how='inner')
encdemo.head()


encdemo.shape

(753484, 8)


encdemo.encounter_type.value_counts()

Office Visit               431080
Appointment                 97718
Hospital Encounter          86789
Visit Encounter             62740
Clinical Support            21626
Procedure visit             18256
Results Only                 8895
Orders Only                  8635
Anti-coag visit              7284
Provider Procedure           2987
Ancillary Orders             2272
Infusion                     1448
Scanned Document             1255
Abstract                      536
Follow-Up                     440
Telephone                     347
Documentation                 346
Routine Prenatal              318
Patient Email                 214
OB Nurse History              106
Initial Prenatal               53
Erroneous Encounter            44
Refill                         42
Home Care Visit                12
Evaluation                      9
MyChart Refill                  8
Allied Health                   7
Letter (Out)                    7
Postpartum Visit                5
Initial consult                 2
Home Visit                      2
Administratively Closed         1
Name: encounter_type, dtype: int64


encdemo = encdemo.sort_values(by=['osler_id','encounter_date'])
encdemo.reset_index(inplace=True)


visit_gap_df=pd.DataFrame([])

for i, row in encdemo.iterrows():
    if i > 0: 
        if row.osler_id == prev_row.osler_id and prev_row.encounter_type=='Hospital Encounter' and row.encounter_type=='Office Visit':
            visit_gap_df = visit_gap_df.append(pd.DataFrame.from_records([{'gap':(row.encounter_date-prev_row.encounter_date).days, 'osler_id':row.osler_id, 'date_of_birth':row.date_of_birth, 'encounter_date':row.encounter_date, 'gender':row.gender}]))
    prev_row = row


visit_gap_df.shape

(29914, 5)


visit_gap_df.head(10)


visit_gap_df['gap'].plot(kind='hist')
plt.show()


visit_gap_df['gap'].describe()

count    29914.000000
mean        24.172795
std         53.534386
min          0.000000
25%          0.000000
50%          6.000000
75%         22.000000
max        810.000000
Name: gap, dtype: float64


visit_gap_df.reset_index(inplace=True)
visit_gap_df.drop(labels='index', axis=1, inplace=True)
visit_gap_df.head()


visit_gap_df.drop(visit_gap_df[(visit_gap_df['gap']==0) | (visit_gap_df['gap']>180)].index, inplace=True)


visit_gap_df.shape

(20810, 5)


visit_gap_df['gap'].plot(kind='hist')
plt.show()


visit_gap_df.describe()


visit_gap_df['gap'].value_counts()

1      1689
2      1217
7      1116
6      1028
4      1007
5       989
3       982
8       739
14      598
9       559
13      481
11      457
10      456
12      448
15      405
20      342
21      340
19      305
16      302
17      282
18      259
28      246
22      239
27      206
35      174
26      174
25      161
24      158
23      158
42      156
       ... 
108       9
180       9
123       9
138       9
137       9
145       8
155       8
170       8
153       8
135       8
125       8
174       8
117       8
179       8
130       8
156       8
162       7
164       7
171       7
176       7
172       6
159       6
178       5
177       5
167       4
173       4
142       4
166       4
165       4
169       3
Name: gap, Length: 180, dtype: int64


from scipy.stats import kruskal


male_gap = visit_gap_df[visit_gap_df['gender']=='Male']['gap']
female_gap = visit_gap_df[visit_gap_df['gender']=='Female']['gap']
print(str(len(male_gap)) + ' Male Obs')
print(str(len(female_gap)) + ' Female Obs')

5906 Male Obs
14904 Female Obs


stat, p = kruskal(male_gap, female_gap)


print('H=%.3f, p=%.3f' % (stat, p))

H=3.100, p=0.078

	name	object_id	principal_id	schema_id	type	type_desc	create_date	modify_date	is_ms_shipped	...	lock_escalation_desc	is_filetable	is_memory_optimized	durability_desc	temporal_type_desc	history_table_id	is_remote_data_archive_enabled	is_external
0	patients	565577053	None	1	U	USER_TABLE	2019-05-08 12:57:55.070000	2019-05-13 13:55:38.813333	False	...	TABLE	False	False	SCHEMA_AND_DATA	NON_TEMPORAL_TABLE	None	False	False
1	encounters	581577110	None	1	U	USER_TABLE	2019-05-08 12:57:55.146666	2019-05-13 13:47:56.620000	False	...	TABLE	False	False	SCHEMA_AND_DATA	NON_TEMPORAL_TABLE	None	False	False
2	labs	597577167	None	1	U	USER_TABLE	2019-05-08 12:57:55.193333	2019-05-13 13:48:07.196666	False	...	TABLE	False	False	SCHEMA_AND_DATA	NON_TEMPORAL_TABLE	None	False	False
3	meds	629577281	None	1	U	USER_TABLE	2019-05-08 12:57:55.286666	2019-05-13 13:52:52.606666	False	...	TABLE	False	False	SCHEMA_AND_DATA	NON_TEMPORAL_TABLE	None	False	False
4	problemlist	661577395	None	1	U	USER_TABLE	2019-05-08 13:00:25.690000	2019-05-13 13:53:52.313333	False	...	TABLE	False	False	SCHEMA_AND_DATA	NON_TEMPORAL_TABLE	None	False	False
5	procedures	677577452	None	1	U	USER_TABLE	2019-05-08 13:00:25.736666	2019-05-13 13:54:00.673333	False	...	TABLE	False	False	SCHEMA_AND_DATA	NON_TEMPORAL_TABLE	None	False	False
6	symptoms	709577566	None	1	U	USER_TABLE	2019-05-08 13:00:25.863333	2019-05-13 13:54:08.136666	False	...	TABLE	False	False	SCHEMA_AND_DATA	NON_TEMPORAL_TABLE	None	False	False
7	vitals_BP	725577623	None	1	U	USER_TABLE	2019-05-08 13:05:47.126666	2019-05-13 13:54:20.460000	False	...	TABLE	False	False	SCHEMA_AND_DATA	NON_TEMPORAL_TABLE	None	False	False
8	vitals_height	741577680	None	1	U	USER_TABLE	2019-05-08 13:05:47.173333	2019-05-13 13:54:31.063333	False	...	TABLE	False	False	SCHEMA_AND_DATA	NON_TEMPORAL_TABLE	None	False	False
9	vitals_pulse	757577737	None	1	U	USER_TABLE	2019-05-08 13:05:47.220000	2019-05-13 13:54:45.373333	False	...	TABLE	False	False	SCHEMA_AND_DATA	NON_TEMPORAL_TABLE	None	False	False
10	vitals_respiration	773577794	None	1	U	USER_TABLE	2019-05-08 13:05:47.266666	2019-05-13 13:54:55.776666	False	...	TABLE	False	False	SCHEMA_AND_DATA	NON_TEMPORAL_TABLE	None	False	False
11	vitals_temperature	789577851	None	1	U	USER_TABLE	2019-05-08 13:05:47.313333	2019-05-13 13:55:08.500000	False	...	TABLE	False	False	SCHEMA_AND_DATA	NON_TEMPORAL_TABLE	None	False	False
12	vitals_weight	805577908	None	1	U	USER_TABLE	2019-05-08 13:06:42.970000	2019-05-13 13:55:17.583333	False	...	TABLE	False	False	SCHEMA_AND_DATA	NON_TEMPORAL_TABLE	None	False	False
13	sysdiagrams	1061578820	None	1	U	USER_TABLE	2019-05-09 07:12:50.733333	2019-05-09 07:12:50.733333	False	...	TABLE	False	False	SCHEMA_AND_DATA	NON_TEMPORAL_TABLE	None	False	False

	osler_id	date_of_birth	gender	race	ethnicity
0	5303550b-8ed2-42fd-885a-d32b308b05f3	2013-12-09	Male	Other	Not Hispanic or Latino
1	3ef665bc-8900-4a68-b086-bfbde88d2280	2015-05-27	Male	White or Caucasian	Not Hispanic or Latino
2	39026546-483b-4959-a889-82a4e3bbaa58	1977-03-14	Female	Black or African American	Not Hispanic or Latino
3	dcbd2d5c-2702-4469-988c-ea647963c49a	1970-08-31	Male	White or Caucasian	Not Hispanic or Latino
4	ab7e569a-2224-4540-8054-bcbef6ea0484	2002-02-18	Male	Black or African American	Not Hispanic or Latino

	osler_id	encounter_id	encounter_type	encounter_date
0	5303550b-8ed2-42fd-885a-d32b308b05f3	17938	Office Visit	2015-10-10
1	5303550b-8ed2-42fd-885a-d32b308b05f3	142706	Office Visit	2016-01-24
2	5303550b-8ed2-42fd-885a-d32b308b05f3	571465	Office Visit	2017-03-19
3	5303550b-8ed2-42fd-885a-d32b308b05f3	432470	Office Visit	2016-10-22
4	5303550b-8ed2-42fd-885a-d32b308b05f3	410795	Office Visit	2016-10-01

	osler_id	encounter_id	encounter_type	encounter_date	date_of_birth	gender	race	ethnicity
0	5303550b-8ed2-42fd-885a-d32b308b05f3	17938	Office Visit	2015-10-10	2013-12-09	Male	Other	Not Hispanic or Latino
1	5303550b-8ed2-42fd-885a-d32b308b05f3	142706	Office Visit	2016-01-24	2013-12-09	Male	Other	Not Hispanic or Latino
2	5303550b-8ed2-42fd-885a-d32b308b05f3	571465	Office Visit	2017-03-19	2013-12-09	Male	Other	Not Hispanic or Latino
3	5303550b-8ed2-42fd-885a-d32b308b05f3	432470	Office Visit	2016-10-22	2013-12-09	Male	Other	Not Hispanic or Latino
4	5303550b-8ed2-42fd-885a-d32b308b05f3	410795	Office Visit	2016-10-01	2013-12-09	Male	Other	Not Hispanic or Latino

date_of_birth	encounter_date	gap	gender	osler_id
1983-11-30	2016-09-17	0	Female	0000c828-f84f-485d-9fa9-66146905ddb3
1983-11-30	2016-11-22	10	Female	0000c828-f84f-485d-9fa9-66146905ddb3
1946-11-24	2016-11-10	7	Female	000687c9-13f7-4da3-84f7-956ec7b08cfe
1946-11-24	2017-05-05	24	Female	000687c9-13f7-4da3-84f7-956ec7b08cfe
1983-12-26	2017-04-29	0	Male	0009ac4e-1c70-40cf-910f-a6d2b2339c29
1983-12-26	2017-09-23	8	Male	0009ac4e-1c70-40cf-910f-a6d2b2339c29
1983-12-26	2017-11-23	8	Male	0009ac4e-1c70-40cf-910f-a6d2b2339c29
1983-12-26	2018-03-03	0	Male	0009ac4e-1c70-40cf-910f-a6d2b2339c29
1983-12-26	2018-03-09	2	Male	0009ac4e-1c70-40cf-910f-a6d2b2339c29
1948-06-30	2016-08-05	28	Female	0009b201-21f9-48b8-94c0-853d96914ffa

Statistical testing EMR Asthma example¶

Learning Objectives¶

Table of contents¶

Libraries, Authentication, Database Connection¶

ANOVA : Are Male and Female Post-Discharge Appointment Timings the Same?¶

Identifying and computing relationship between the `'Office Visit'` that follows a `'Hospital Encounter'` `encounter_type`¶

	gap
count	20810.000000
mean	24.717347
std	32.469287
min	1.000000
25%	5.000000
50%	12.000000
75%	30.000000
max	180.000000

Statistical testing EMR Asthma example¶

Learning Objectives¶

Table of contents¶

Libraries, Authentication, Database Connection¶

ANOVA : Are Male and Female Post-Discharge Appointment Timings the Same?¶

Identifying and computing relationship between the 'Office Visit' that follows a 'Hospital Encounter' encounter_type¶

Identifying and computing relationship between the `'Office Visit'` that follows a `'Hospital Encounter'` `encounter_type`¶