library(readr) # Read file inputs
library(DBI) # Database connection
library(kableExtra) # Table formatting
library(ggplot2) # Graphing
library(plotly) # Make interactive graphs
library(magrittr) # Pipes %>%
library(dplyr) # Data manipulation
library(ggraph) # More graphing
library(igraph) # Even more graphingAttribution <- read_delim("data/AttributionData.csv", delim = "|")
Encounter <- read_delim("data/EncounterData.csv", delim = "|")
Person <- read_delim("data/PersonData.csv", delim = "|", skip_empty_rows = TRUE)
Person <- Person[-(nrow(Person)),]network_db_file <- "data/network_db.sqlite"con <- dbConnect(RSQLite::SQLite(), network_db_file)dbWriteTable(con, "Attribution", Attribution[,], overwrite = TRUE, row.names = FALSE)| person_id | client_DOB | Medicaid_Begin | Medicaid_End | AttributionDate | PrevAttrib | orig_site_id |
|---|---|---|---|---|---|---|
| 10EDA3E5-E0AD-4BF3-A8C3-5477A9184B7F | Sep 5 1966 12:00AM | 20170201 | 22991231 | 20190301 | No - New to PCMP | 15 |
| 46D8B92E-870E-49BF-9063-C5609A72702A | Mar 15 1988 12:00AM | 20150801 | 22991231 | 20190301 | Yes - Continuous | 15 |
| 24C40ACB-C764-4EB1-B5AB-EC154906B1CB | May 13 1985 12:00AM | NULL | NULL | 20190301 | Yes - Continuous | 15 |
| 60A2FD9B-05FD-43ED-B1BC-2C910455C254 | Mar 19 1957 12:00AM | 20151101 | 22991231 | 20190301 | Yes - Continuous | 13 |
| F3C34F73-7975-4335-B73D-1F3ACF5FE788 | May 4 1962 12:00AM | 20160301 | 22991231 | 20190301 | Yes - Continuous | 15 |
dbWriteTable(con, "Encounter", Encounter[,], overwrite = TRUE, row.names = FALSE)| enc_number | enc_timestamp | enc_billable_ind | enc_orig_site_id | enc_qualifying_ind | enc_service_line_unscrubbed | enc_pat_drvs_number | client_service_line_id | service_line_mapped |
|---|---|---|---|---|---|---|---|---|
| 3544519 | 2015-10-20 19:12:32 | N | 2 | Y | Case Management | 168775 | 325 | Behavioral Health |
| 3544709 | 2015-10-20 21:49:43 | N | 1 | N | MD_Family Practitioner | 29116 | 63 | Primary Care |
| 3545122 | 1900-10-19 22:56:11 | N | 1 | N | LPC_Mental Health Practitioner | 51591 | 62 | Behavioral Health |
| 3545157 | 2015-10-20 20:54:41 | N | 2 | N | Other | 135474 | 330 | Specialty |
| 3545517 | 2015-10-19 10:31:33 | N | 1 | N | UDS_Education Specialists | 59088 | 73 | Behavioral Health |
| 3545559 | 2015-10-18 21:47:00 | N | 1 | N | _ | 31203 | 38 | Ignore |
dbWriteTable(con, "Person", Person[,], overwrite = TRUE, row.names = FALSE)| person_id | pat_drvs_number | pat_date_of_birth | pat_homelessness_status | pat_migrant_status | pat_veteran_status | pat_create_timestamp | pat_modify_timestamp | pat_orig_site_id |
|---|---|---|---|---|---|---|---|---|
| 637AA380-3E1E-4F10-A4E8-EE96F6979C95 | 90 | 1990-02-05 | Not Homeless | Not A Farm Worker | N | 2010-04-26 13:58:43 | 2015-05-28 11:50:06 | 1 |
| 8F63BF05-E0A1-4617-BE5E-6D0D0EF9795C | 1609 | 1990-03-24 | Not Homeless | Not A Farm Worker | N | 2011-12-19 10:19:12 | 2015-11-30 12:12:46 | 1 |
| 4362171F-92B6-4D6B-87F7-880FD2DA67DA | 4682 | 1972-07-05 | Not Homeless | Not A Farm Worker | N | 2011-01-28 11:06:40 | 2015-06-25 13:52:37 | 1 |
| 661B4E44-3B6E-49F1-9A49-7A217ADEC658 | 8451 | 1992-03-18 | Not Homeless | Not A Farm Worker | N | 2012-09-11 08:14:40 | 2019-04-16 10:13:20 | 1 |
| A8A108BB-6731-4B81-93AF-1F4F7C3D9923 | 10383 | 1991-05-01 | NULL | NULL | N | 2010-04-26 13:58:43 | 2010-04-26 12:01:40 | 1 |
| 9C234B3E-3B87-4C8A-AF26-1C9E265D4E25 | 15559 | 1990-07-03 | Not Homeless | Not A Farm Worker | N | 2011-07-26 09:27:46 | 2019-04-16 10:24:39 | 1 |
WITH enc_ranked AS(
SELECT
p.person_id,
p.pat_drvs_number,
e.enc_pat_drvs_number,
e.enc_number,
e.enc_service_line_unscrubbed,
e.service_line_mapped,
e.enc_orig_site_id,
datetime(e.enc_timestamp, 'unixepoch') as enc_timestamp,
ROW_NUMBER() OVER(PARTITION BY p.person_id ORDER BY enc_timestamp DESC) AS rank
FROM
Encounter e
INNER JOIN Person p
on e.enc_pat_drvs_number = p.pat_drvs_number)
SELECT
person_id,
enc_number,
enc_timestamp,
enc_orig_site_id,
enc_service_line_unscrubbed,
service_line_mapped
FROM
enc_ranked
WHERE rank = 1| person_id | enc_number | enc_timestamp | enc_orig_site_id | enc_service_line_unscrubbed | service_line_mapped |
|---|---|---|---|---|---|
| 00225091-B854-4B6D-8520-308C20E0FE2E | 32354540 | 2019-07-23 13:40:00 | 11 | Family Medicine | Primary Care |
| 006589B1-A5F7-493D-A86A-C5F5A6FC3767 | 31211784 | 2019-05-13 13:20:00 | 11 | Family Medicine | Primary Care |
| 008394B7-47A5-4206-A641-D7116DB12B3B | 32579131 | 2019-08-05 09:30:00 | 11 | Dental General Practice | Dental |
| 00BA341D-528C-465F-8C59-113DE481CBD1 | 28327946 | 2018-12-07 10:20:00 | 11 | Family Medicine | Primary Care |
| 015185D9-24F6-415C-860F-A7E5DA5DBC23 | 32528909 | 2019-08-01 09:20:00 | 11 | Family Medicine | Primary Care |
# Summarize data by health center and specialty
q1_summary <- q1_output %>%
group_by(enc_orig_site_id, service_line_mapped) %>%
summarize(n = n()) %>%
ungroup()
# Create stacked bar chart
p <- ggplot(q1_summary, aes(x = enc_orig_site_id, y = n, fill = service_line_mapped)) +
geom_col(position = "stack") +
scale_fill_discrete() +
scale_x_discrete(labels = seq_along(unique(q1_summary$enc_orig_site_id))) +
labs(x = "Health Center", y = "Count", fill = "Specialty") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
ggplotly(p)WITH enc_site AS(
SELECT
p.person_id,
p.pat_drvs_number,
e.enc_pat_drvs_number,
e.enc_orig_site_id
FROM
Encounter e
INNER JOIN Person p
ON e.enc_pat_drvs_number = p.pat_drvs_number)
SELECT person_id, COUNT(DISTINCT(enc_orig_site_id)) AS centers_visited
FROM
enc_site
GROUP BY person_id
HAVING centers_visited > 1| person_id | centers_visited |
|---|---|
| 00225091-B854-4B6D-8520-308C20E0FE2E | 2 |
| 006589B1-A5F7-493D-A86A-C5F5A6FC3767 | 3 |
| 015185D9-24F6-415C-860F-A7E5DA5DBC23 | 3 |
| 01A964D5-CF94-4C57-B13C-17912E50CBBC | 2 |
| 01D8C426-1C75-449D-A9BB-291BB67BD897 | 2 |
| 021F6E0B-D736-44CB-BFD9-7CDDEC3ECC25 | 2 |
# Query data from database to find edges in network
edges <- dbGetQuery(con, "WITH enc_site AS(
SELECT
e.enc_pat_drvs_number,
e.enc_orig_site_id
FROM Encounter e)
SELECT e1.enc_orig_site_id AS site1, e2.enc_orig_site_id AS site2, COUNT(DISTINCT e1.enc_pat_drvs_number) AS weight
FROM enc_site e1
JOIN enc_site e2 ON e1.enc_pat_drvs_number = e2.enc_pat_drvs_number AND e1.enc_orig_site_id < e2.enc_orig_site_id
GROUP BY e1.enc_orig_site_id, e2.enc_orig_site_id")
# Create igraph object
graph <- graph_from_data_frame(edges, directed = FALSE)
# Add node attribute for health center names
health_centers <- dbGetQuery(con, "SELECT enc_orig_site_id as name FROM Encounter")
V(graph)$name <- health_centers[match(V(graph)$name, health_centers$name), "name"]
# Convert igraph object to ggplot object using ggraph
g <- ggraph(graph, layout = "kk") +
geom_edge_link(aes(width = weight)) +
geom_node_point(linewidth = 5, color = "blue") +
geom_node_text(aes(label = name), repel = TRUE) +
theme_void()
# Plot the graph
gWITH enc_ranked AS(
SELECT
p.person_id,
p.pat_drvs_number,
e.enc_pat_drvs_number,
e.enc_number,
e.enc_orig_site_id,
datetime(e.enc_timestamp, 'unixepoch') as enc_timestamp,
ROW_NUMBER() OVER(PARTITION BY p.person_id ORDER BY enc_timestamp DESC) AS rank
FROM
Encounter e
INNER JOIN Person p
ON e.enc_pat_drvs_number = p.pat_drvs_number
),
recent_visit AS(
SELECT person_id,
enc_number,
enc_orig_site_id
FROM
enc_ranked
WHERE rank = 1)
SELECT
v.person_id,
v.enc_number,
v.enc_orig_site_id,
a.orig_site_id,
CASE WHEN v.enc_orig_site_id = a.orig_site_id
THEN 'YES'
ELSE 'NO'
END AS enc_atrb_orig_site
FROM
recent_visit v
LEFT JOIN Attribution a
ON v.person_id = a.person_id
GROUP BY v.person_id| person_id | enc_number | enc_orig_site_id | orig_site_id | enc_atrb_orig_site |
|---|---|---|---|---|
| 00225091-B854-4B6D-8520-308C20E0FE2E | 32354540 | 11 | 7 | NO |
| 006589B1-A5F7-493D-A86A-C5F5A6FC3767 | 31211784 | 11 | 2 | NO |
| 008394B7-47A5-4206-A641-D7116DB12B3B | 32579131 | 11 | 11 | YES |
| 00BA341D-528C-465F-8C59-113DE481CBD1 | 28327946 | 11 | 11 | YES |
SELECT
e.enc_service_line_unscrubbed as specialty,
COUNT(DISTINCT e.enc_pat_drvs_number) as unique_patients
FROM
Encounter e
WHERE
e.enc_service_line_unscrubbed <> 'NULL' AND
e.enc_service_line_unscrubbed <> 'Other'
GROUP BY enc_service_line_unscrubbed
ORDER BY unique_patients DESC# Generate pie chart
pie_data <- q4_output[1:15,]
# Create plotly interactive pie chart
plot_ly(data = pie_data, labels = ~specialty, values = ~unique_patients, type = "pie",
hoverinfo = "label+value", textinfo = "percent", textposition = "inside",
insidetextfont = list(size = 14), marker = list(colors = rainbow(length(unique(pie_data$specialty))),
line = list(color = "#FFFFFF", width = 2))) %>%
layout(title = "Unique Patients by Specialty",
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
showlegend = TRUE,
legend = list(orientation = "v", x = 1.1, y = 0.5))