Skip to content

Commit

Permalink
Merge pull request #65 from PNNL-CompBio/repo-restructure
Browse files Browse the repository at this point in the history
slight updates to fix column selection
  • Loading branch information
sgosline authored May 13, 2022
2 parents 1c8facf + 88b5d39 commit fd5ba0a
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 3 deletions.
12 changes: 10 additions & 2 deletions bmd2Samps/buildv1database.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ required_bmd_columns<-list(bmd=c('Chemical_ID','End_Point','Model','BMD10','BMD5
doseRep=c("Chemical_ID","End_Point","Dose","Response","CI_Lo","CI_Hi"),
fitVals=c("Chemical_ID","End_Point","X_vals","Y_vals"))


envSampSumOutput<-c("Model","BMD10","BMD50","Min_Dose","Max_Dose","AUC_Norm",
"SampleNumber","date_sampled","sample_matrix","technology","Sample_ID","ClientName",
"SampleName","LocationLat","LocationLon","LocationName","LocationAlternateDescription",
"AlternateName","End Point Name","Description","endPointLink","DataQC_Flag",
"projectName","projectLink")
##################################
#Master ID tables
#The database requires Sample_ID and Chemical_ID be unique. They are in some files but not others
Expand Down Expand Up @@ -362,7 +368,8 @@ combineChemicalEndpointData<-function(bmdfiles,is_extract=FALSE,sampChem,endpoin
mid.bmd<-mid.bmd[-dupes,]
}
if(is_extract){
sdSamp<-sampChem%>%tidyr::separate('Sample_ID',into=c('tmpId','sub'),sep='-',remove=FALSE)%>%
sdSamp<-sampChem%>%
tidyr::separate('Sample_ID',into=c('tmpId','sub'),sep='-',remove=FALSE)%>%
select(-sub)

full.bmd<-mid.bmd%>%
Expand Down Expand Up @@ -666,6 +673,7 @@ buildDB<-function(chem.files=c(),extract.files=c()){

message('Processing extract response data')
ebmds<-combineChemicalEndpointData(e.bmd,is_extract=TRUE,sampChem,endpointDetails)%>%
select(envSampSumOutput)%>%
unique()
ecurves <- combineChemicalFitData(e.curve,is_extract=TRUE, sampChem,endpointDetails)%>%
unique()
Expand Down Expand Up @@ -737,7 +745,7 @@ buildDB<-function(chem.files=c(),extract.files=c()){
samp.eps<-sampChem%>%
subset(!measurement_value_qualifier%in%c("U","J"))%>%
dplyr::select(Sample_ID,LocationName,Chemical_ID)%>%distinct()%>%
full_join(ebmds,by=c('Sample_ID','LocationName','Chemical_ID'))%>%
full_join(ebmds,by=c('Sample_ID','LocationName'))%>%
select(c('Sample_ID','LocationName',Chemical_ID,'End Point Name','AUC_Norm'))%>%
group_by(LocationName)%>%
summarize(numSampls=n_distinct(Sample_ID),numChems=n_distinct(Chemical_ID),num_endpoints=n_distinct(`End Point Name`))
Expand Down
Binary file modified bmd2Samps/data/fses/FSES_indoor_outdoor_study.xlsx
Binary file not shown.
11 changes: 11 additions & 0 deletions chemCounts.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"chemical_class"|"End Points"|"Samples"|"Chemicals"
"aniline"|50|380|5
"dioxinsAndFurans"|20|160|2
"None"|0|102094|NA
"OPAH"|130|1041|13
"OPFR"|30|228|3
"PAH"|804|8787|79
"PBDE"|30|228|3
"phenol"|110|760|11
"Uncategorized"|652|4953|65
"Unclassified"|3710|43|345
2 changes: 1 addition & 1 deletion chemCounts.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
"PAH"|804|8787|79
"PBDE"|30|228|3
"phenol"|110|760|11
"Unclassified"|4180|4996|418
"Unclassified"|4362|4996|410

0 comments on commit fd5ba0a

Please sign in to comment.