library(readr) # Read file inputs
library(DBI) # Database connection
library(kableExtra) # Table formatting
library(ggplot2) # Graphing
library(plotly) # Make interactive graphs
library(magrittr) # Pipes %>%
library(dplyr) # Data manipulation
library(ggraph) # More graphing
library(igraph) # Even more graphing
<- read_delim("data/AttributionData.csv", delim = "|")
Attribution <- read_delim("data/EncounterData.csv", delim = "|")
Encounter <- read_delim("data/PersonData.csv", delim = "|", skip_empty_rows = TRUE)
Person <- Person[-(nrow(Person)),] Person
<- "data/network_db.sqlite" network_db_file
<- dbConnect(RSQLite::SQLite(), network_db_file) con
dbWriteTable(con, "Attribution", Attribution[,], overwrite = TRUE, row.names = FALSE)
person_id | client_DOB | Medicaid_Begin | Medicaid_End | AttributionDate | PrevAttrib | orig_site_id |
---|---|---|---|---|---|---|
10EDA3E5-E0AD-4BF3-A8C3-5477A9184B7F | Sep 5 1966 12:00AM | 20170201 | 22991231 | 20190301 | No - New to PCMP | 15 |
46D8B92E-870E-49BF-9063-C5609A72702A | Mar 15 1988 12:00AM | 20150801 | 22991231 | 20190301 | Yes - Continuous | 15 |
24C40ACB-C764-4EB1-B5AB-EC154906B1CB | May 13 1985 12:00AM | NULL | NULL | 20190301 | Yes - Continuous | 15 |
60A2FD9B-05FD-43ED-B1BC-2C910455C254 | Mar 19 1957 12:00AM | 20151101 | 22991231 | 20190301 | Yes - Continuous | 13 |
F3C34F73-7975-4335-B73D-1F3ACF5FE788 | May 4 1962 12:00AM | 20160301 | 22991231 | 20190301 | Yes - Continuous | 15 |
dbWriteTable(con, "Encounter", Encounter[,], overwrite = TRUE, row.names = FALSE)
enc_number | enc_timestamp | enc_billable_ind | enc_orig_site_id | enc_qualifying_ind | enc_service_line_unscrubbed | enc_pat_drvs_number | client_service_line_id | service_line_mapped |
---|---|---|---|---|---|---|---|---|
3544519 | 2015-10-20 19:12:32 | N | 2 | Y | Case Management | 168775 | 325 | Behavioral Health |
3544709 | 2015-10-20 21:49:43 | N | 1 | N | MD_Family Practitioner | 29116 | 63 | Primary Care |
3545122 | 1900-10-19 22:56:11 | N | 1 | N | LPC_Mental Health Practitioner | 51591 | 62 | Behavioral Health |
3545157 | 2015-10-20 20:54:41 | N | 2 | N | Other | 135474 | 330 | Specialty |
3545517 | 2015-10-19 10:31:33 | N | 1 | N | UDS_Education Specialists | 59088 | 73 | Behavioral Health |
3545559 | 2015-10-18 21:47:00 | N | 1 | N | _ | 31203 | 38 | Ignore |
dbWriteTable(con, "Person", Person[,], overwrite = TRUE, row.names = FALSE)
person_id | pat_drvs_number | pat_date_of_birth | pat_homelessness_status | pat_migrant_status | pat_veteran_status | pat_create_timestamp | pat_modify_timestamp | pat_orig_site_id |
---|---|---|---|---|---|---|---|---|
637AA380-3E1E-4F10-A4E8-EE96F6979C95 | 90 | 1990-02-05 | Not Homeless | Not A Farm Worker | N | 2010-04-26 13:58:43 | 2015-05-28 11:50:06 | 1 |
8F63BF05-E0A1-4617-BE5E-6D0D0EF9795C | 1609 | 1990-03-24 | Not Homeless | Not A Farm Worker | N | 2011-12-19 10:19:12 | 2015-11-30 12:12:46 | 1 |
4362171F-92B6-4D6B-87F7-880FD2DA67DA | 4682 | 1972-07-05 | Not Homeless | Not A Farm Worker | N | 2011-01-28 11:06:40 | 2015-06-25 13:52:37 | 1 |
661B4E44-3B6E-49F1-9A49-7A217ADEC658 | 8451 | 1992-03-18 | Not Homeless | Not A Farm Worker | N | 2012-09-11 08:14:40 | 2019-04-16 10:13:20 | 1 |
A8A108BB-6731-4B81-93AF-1F4F7C3D9923 | 10383 | 1991-05-01 | NULL | NULL | N | 2010-04-26 13:58:43 | 2010-04-26 12:01:40 | 1 |
9C234B3E-3B87-4C8A-AF26-1C9E265D4E25 | 15559 | 1990-07-03 | Not Homeless | Not A Farm Worker | N | 2011-07-26 09:27:46 | 2019-04-16 10:24:39 | 1 |
WITH enc_ranked AS(
SELECT
p.person_id,
p.pat_drvs_number,
e.enc_pat_drvs_number,
e.enc_number,
e.enc_service_line_unscrubbed,
e.service_line_mapped,
e.enc_orig_site_id, 'unixepoch') as enc_timestamp,
datetime(e.enc_timestamp, ROW_NUMBER() OVER(PARTITION BY p.person_id ORDER BY enc_timestamp DESC) AS rank
FROM
Encounter eINNER JOIN Person p
on e.enc_pat_drvs_number = p.pat_drvs_number)
SELECT
person_id,
enc_number,
enc_timestamp,
enc_orig_site_id,
enc_service_line_unscrubbed,
service_line_mappedFROM
enc_ranked WHERE rank = 1
person_id | enc_number | enc_timestamp | enc_orig_site_id | enc_service_line_unscrubbed | service_line_mapped |
---|---|---|---|---|---|
00225091-B854-4B6D-8520-308C20E0FE2E | 32354540 | 2019-07-23 13:40:00 | 11 | Family Medicine | Primary Care |
006589B1-A5F7-493D-A86A-C5F5A6FC3767 | 31211784 | 2019-05-13 13:20:00 | 11 | Family Medicine | Primary Care |
008394B7-47A5-4206-A641-D7116DB12B3B | 32579131 | 2019-08-05 09:30:00 | 11 | Dental General Practice | Dental |
00BA341D-528C-465F-8C59-113DE481CBD1 | 28327946 | 2018-12-07 10:20:00 | 11 | Family Medicine | Primary Care |
015185D9-24F6-415C-860F-A7E5DA5DBC23 | 32528909 | 2019-08-01 09:20:00 | 11 | Family Medicine | Primary Care |
# Summarize data by health center and specialty
<- q1_output %>%
q1_summary group_by(enc_orig_site_id, service_line_mapped) %>%
summarize(n = n()) %>%
ungroup()
# Create stacked bar chart
<- ggplot(q1_summary, aes(x = enc_orig_site_id, y = n, fill = service_line_mapped)) +
p geom_col(position = "stack") +
scale_fill_discrete() +
scale_x_discrete(labels = seq_along(unique(q1_summary$enc_orig_site_id))) +
labs(x = "Health Center", y = "Count", fill = "Specialty") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
ggplotly(p)
WITH enc_site AS(
SELECT
p.person_id,
p.pat_drvs_number,
e.enc_pat_drvs_number,
e.enc_orig_site_idFROM
Encounter eINNER JOIN Person p
ON e.enc_pat_drvs_number = p.pat_drvs_number)
SELECT person_id, COUNT(DISTINCT(enc_orig_site_id)) AS centers_visited
FROM
enc_siteGROUP BY person_id
HAVING centers_visited > 1
person_id | centers_visited |
---|---|
00225091-B854-4B6D-8520-308C20E0FE2E | 2 |
006589B1-A5F7-493D-A86A-C5F5A6FC3767 | 3 |
015185D9-24F6-415C-860F-A7E5DA5DBC23 | 3 |
01A964D5-CF94-4C57-B13C-17912E50CBBC | 2 |
01D8C426-1C75-449D-A9BB-291BB67BD897 | 2 |
021F6E0B-D736-44CB-BFD9-7CDDEC3ECC25 | 2 |
# Query data from database to find edges in network
<- dbGetQuery(con, "WITH enc_site AS(
edges SELECT
e.enc_pat_drvs_number,
e.enc_orig_site_id
FROM Encounter e)
SELECT e1.enc_orig_site_id AS site1, e2.enc_orig_site_id AS site2, COUNT(DISTINCT e1.enc_pat_drvs_number) AS weight
FROM enc_site e1
JOIN enc_site e2 ON e1.enc_pat_drvs_number = e2.enc_pat_drvs_number AND e1.enc_orig_site_id < e2.enc_orig_site_id
GROUP BY e1.enc_orig_site_id, e2.enc_orig_site_id")
# Create igraph object
<- graph_from_data_frame(edges, directed = FALSE)
graph
# Add node attribute for health center names
<- dbGetQuery(con, "SELECT enc_orig_site_id as name FROM Encounter")
health_centers V(graph)$name <- health_centers[match(V(graph)$name, health_centers$name), "name"]
# Convert igraph object to ggplot object using ggraph
<- ggraph(graph, layout = "kk") +
g geom_edge_link(aes(width = weight)) +
geom_node_point(linewidth = 5, color = "blue") +
geom_node_text(aes(label = name), repel = TRUE) +
theme_void()
# Plot the graph
g
WITH enc_ranked AS(
SELECT
p.person_id,
p.pat_drvs_number,
e.enc_pat_drvs_number,
e.enc_number,
e.enc_orig_site_id, 'unixepoch') as enc_timestamp,
datetime(e.enc_timestamp, ROW_NUMBER() OVER(PARTITION BY p.person_id ORDER BY enc_timestamp DESC) AS rank
FROM
Encounter eINNER JOIN Person p
ON e.enc_pat_drvs_number = p.pat_drvs_number
), AS(
recent_visit SELECT person_id,
enc_number,
enc_orig_site_id FROM
enc_rankedWHERE rank = 1)
SELECT
v.person_id,
v.enc_number,
v.enc_orig_site_id,
a.orig_site_id,CASE WHEN v.enc_orig_site_id = a.orig_site_id
THEN 'YES'
ELSE 'NO'
END AS enc_atrb_orig_site
FROM
recent_visit vLEFT JOIN Attribution a
ON v.person_id = a.person_id
GROUP BY v.person_id
person_id | enc_number | enc_orig_site_id | orig_site_id | enc_atrb_orig_site |
---|---|---|---|---|
00225091-B854-4B6D-8520-308C20E0FE2E | 32354540 | 11 | 7 | NO |
006589B1-A5F7-493D-A86A-C5F5A6FC3767 | 31211784 | 11 | 2 | NO |
008394B7-47A5-4206-A641-D7116DB12B3B | 32579131 | 11 | 11 | YES |
00BA341D-528C-465F-8C59-113DE481CBD1 | 28327946 | 11 | 11 | YES |
SELECT
as specialty,
e.enc_service_line_unscrubbed COUNT(DISTINCT e.enc_pat_drvs_number) as unique_patients
FROM
Encounter eWHERE
<> 'NULL' AND
e.enc_service_line_unscrubbed <> 'Other'
e.enc_service_line_unscrubbed GROUP BY enc_service_line_unscrubbed
ORDER BY unique_patients DESC
# Generate pie chart
<- q4_output[1:15,]
pie_data
# Create plotly interactive pie chart
plot_ly(data = pie_data, labels = ~specialty, values = ~unique_patients, type = "pie",
hoverinfo = "label+value", textinfo = "percent", textposition = "inside",
insidetextfont = list(size = 14), marker = list(colors = rainbow(length(unique(pie_data$specialty))),
line = list(color = "#FFFFFF", width = 2))) %>%
layout(title = "Unique Patients by Specialty",
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
showlegend = TRUE,
legend = list(orientation = "v", x = 1.1, y = 0.5))