Support
Quality
Security
License
Reuse
kandi has reviewed caffeine and discovered the below as its top functions. This is intended to give you an instant insight into caffeine implemented functionality, and help decide if they suit your requirements.
A high performance caching library for Java
Cache
LoadingCache<Key, Graph> graphs = Caffeine.newBuilder()
.maximumSize(10_000)
.expireAfterWrite(Duration.ofMinutes(5))
.refreshAfterWrite(Duration.ofMinutes(1))
.build(key -> createExpensiveGraph(key));
Download
implementation 'com.github.ben-manes.caffeine:caffeine:3.0.5'
// Optional extensions
implementation 'com.github.ben-manes.caffeine:guava:3.0.5'
implementation 'com.github.ben-manes.caffeine:jcache:3.0.5'
grouped data select rows and include rows before/after
library(tidyverse)
df %>%
setDT(df) %>%
mutate(row_id = row_number()) %>%
group_by(PATIENT.ID) %>%
mutate(first_yes = cumsum(Caffeinefactor == "yes"),
last_no = (Caffeinefactor == "no") * row_number(),
last_no = ifelse(first_yes == 0, last_no, 0),
select_row = (first_yes == 1 & Caffeine > 0) | last_no == max(last_no)) %>%
filter(select_row | lag(select_row) | lead(select_row)) %>%
select(-first_yes, -last_no, -select_row)
PATIENT.ID Caffeine Caffeinefactor PULSE.WIDTH row_id
<int> <int> <chr> <dbl> <int>
1 210625 0 no 0.75 3
2 210625 0 no 0.75 4
3 210625 200 yes 0.75 5
4 210625 200 yes 0.75 6
5 221179 0 no 1 25
6 221179 0 no 1 26
7 221179 200 yes 1 27
8 221179 200 yes 1 28
9 301705 0 no 0.5 36
10 301705 0 no 0.5 37
11 301705 200 yes 0.5 38
12 301705 0 no 0.5 39
-----------------------
library(tidyverse)
df %>%
setDT(df) %>%
mutate(row_id = row_number()) %>%
group_by(PATIENT.ID) %>%
mutate(first_yes = cumsum(Caffeinefactor == "yes"),
last_no = (Caffeinefactor == "no") * row_number(),
last_no = ifelse(first_yes == 0, last_no, 0),
select_row = (first_yes == 1 & Caffeine > 0) | last_no == max(last_no)) %>%
filter(select_row | lag(select_row) | lead(select_row)) %>%
select(-first_yes, -last_no, -select_row)
PATIENT.ID Caffeine Caffeinefactor PULSE.WIDTH row_id
<int> <int> <chr> <dbl> <int>
1 210625 0 no 0.75 3
2 210625 0 no 0.75 4
3 210625 200 yes 0.75 5
4 210625 200 yes 0.75 6
5 221179 0 no 1 25
6 221179 0 no 1 26
7 221179 200 yes 1 27
8 221179 200 yes 1 28
9 301705 0 no 0.5 36
10 301705 0 no 0.5 37
11 301705 200 yes 0.5 38
12 301705 0 no 0.5 39
Can you still use a ConcurrentLinkedHashMap with Caffeine?
var data = new ConcurrentHashMap<String, Integer>();
var order = Collections.synchronizedMap(new LinkedHashMap<String, Integer>());
data.compute("a", (key, oldValue) -> {
order.put(key, 1);
return 1;
});
// Locks the map during access, blocking writes; consider snapshotting first
order.forEach((key, value) -> System.out.printf("%s=%s%n", key, value);
volatile Map<K, V> data = LinkedHashMap.empty();
final Lock lock = new ReentrantLock();
// Single writer
lock.lock();
try {
data = data.put(1, 2).put(3, 4);
} finally {
lock.unlock();
}
// Multiple readers
System.out.println(data);
-----------------------
var data = new ConcurrentHashMap<String, Integer>();
var order = Collections.synchronizedMap(new LinkedHashMap<String, Integer>());
data.compute("a", (key, oldValue) -> {
order.put(key, 1);
return 1;
});
// Locks the map during access, blocking writes; consider snapshotting first
order.forEach((key, value) -> System.out.printf("%s=%s%n", key, value);
volatile Map<K, V> data = LinkedHashMap.empty();
final Lock lock = new ReentrantLock();
// Single writer
lock.lock();
try {
data = data.put(1, 2).put(3, 4);
} finally {
lock.unlock();
}
// Multiple readers
System.out.println(data);
Is there a Python DICOM Modality Worklist setup library?
from pydicom.dataset import Dataset, FileMetaDataset
from pydicom.uid import ExplicitVRLittleEndian
ds = Dataset()
# Add file meta information elements
ds.file_meta = FileMetaDataset()
ds.file_meta.TransferSyntaxUID = ExplicitVRLittleEndian
# Fill out the worklist query elements
ds.SpecificCharacterSet = "ISO_IR 6"
ds.ScheduledProcedureStepSequence = [Dataset()]
ds.ScheduledProcedureStepSequence[0].Modality = "CT"
# etc...
ds.save_as("query.wl", write_like_original=False)
-----------------------
import os
from os import path
from pydicom.dataset import Dataset, FileMetaDataset
from pydicom.uid import ExplicitVRLittleEndian
wl_file_name = "directory/file.wl"
txt_file_name = "directory/file.txt"
# Create data set
ds = Dataset()
# Add file meta information elements
ds.file_meta = FileMetaDataset()
ds.file_meta.TransferSyntaxUID = ExplicitVRLittleEndian
ds.file_meta.MediaStorageSOPClassUID = "0"
ds.file_meta.MediaStorageSOPInstanceUID = "0"
# Fill out the worklist query elements
ds.SpecificCharacterSet = "ISO_IR 6"
ds.InstanceCreationDate = "20220101"
ds.AccessionNumber = "12345-abc"
ds.PatientName = "SURNAME^NAME"
ds.PatientID = "123456"
ds.PatientBirthDate = "19700101"
ds.PatientSex = "M"
ds.StudyInstanceUID = "1a-2b-3c"
ds.RequestedProcedureDescription = "ProcedureDescription"
ds.ScheduledProcedureStepSequence = [Dataset()]
ds.ScheduledProcedureStepSequence[0].Modality = "OT"
ds.ScheduledProcedureStepSequence[0].ScheduledStationAETitle = "OT"
ds.ScheduledProcedureStepSequence[0].ScheduledProcedureStepStartDate = "20220101"
ds.ScheduledProcedureStepSequence[0].ScheduledProcedureStepStartTime = "080000"
ds.ScheduledProcedureStepSequence[0].ScheduledPerformingPhysicianName = "Doctor Emmet Brown"
ds.ScheduledProcedureStepSequence[0].ScheduledProcedureStepDescription = "SchedProcStepDesc"
ds.ScheduledProcedureStepID = "0001"
# more stuff if you need
# Save directly as a .wl file.
# Set write_like_original=False to be certain you’re writing the dataset in the DICOM File Format
ds.save_as(wl_file_name, write_like_original=False)
# Additionally, you can also make a readable txt file for humans
# Check if txt file already exists
if(path.exists(txt_file_name)): #if txt file exists, remove it first
try:
os.remove(txt_file_name)
except OSError as e:
print("Error: %s : %s" % (txt_file_name, e.strerror))
# Run dcmdump command to convert wl file to txt
convert_wl_to_txt_cmd = "dcmdump " + wl_file_name + " > " + txt_file_name
os.system(convert_wl_to_txt_cmd)
# Dicom-File-Format
# Dicom-Meta-Information-Header
# Used TransferSyntax: Little Endian Explicit
(0002,0000) UL 120 # 4, 1 FileMetaInformationGroupLength
(0002,0001) OB 00\01 # 2, 1 FileMetaInformationVersion
(0002,0002) UI [0] # 2, 1 MediaStorageSOPClassUID
(0002,0003) UI [0] # 2, 1 MediaStorageSOPInstanceUID
(0002,0010) UI =LittleEndianExplicit # 20, 1 TransferSyntaxUID
(0002,0012) UI [1.2.826.0.1.3680043.8.498.1] # 28, 1 ImplementationClassUID
(0002,0013) SH [PYDICOM 2.2.2] # 14, 1 ImplementationVersionName
# Dicom-Data-Set
# Used TransferSyntax: Little Endian Explicit
(0008,0005) CS [ISO_IR 6] # 8, 1 SpecificCharacterSet
(0008,0012) DA [20220101] # 8, 1 InstanceCreationDate
(0008,0050) SH [12345-abc] # 10, 1 AccessionNumber
(0010,0010) PN [SURNAME^NAME] # 12, 1 PatientName
(0010,0020) LO [123456] # 6, 1 PatientID
(0010,0030) DA [19700101] # 8, 1 PatientBirthDate
(0010,0040) CS [M] # 2, 1 PatientSex
(0020,000d) UI [1a-2b-3c] # 8, 1 StudyInstanceUID
(0032,1060) LO [ProcedureDescription] # 20, 1 RequestedProcedureDescription
(0040,0009) SH [0001] # 4, 1 ScheduledProcedureStepID
(0040,0100) SQ (Sequence with explicit length #=1) # 110, 1 ScheduledProcedureStepSequence
(fffe,e000) na (Item with explicit length #=6) # 102, 1 Item
(0008,0060) CS [OT] # 2, 1 Modality
(0040,0001) AE [OT] # 2, 1 ScheduledStationAETitle
(0040,0002) DA [20220101] # 8, 1 ScheduledProcedureStepStartDate
(0040,0003) TM [080000] # 6, 1 ScheduledProcedureStepStartTime
(0040,0006) PN [Doctor Emmet Brown] # 18, 1 ScheduledPerformingPhysicianName
(0040,0007) LO [SchedProcStepDesc] # 18, 1 ScheduledProcedureStepDescription
(fffe,e00d) na (ItemDelimitationItem for re-encoding) # 0, 0 ItemDelimitationItem
(fffe,e0dd) na (SequenceDelimitationItem for re-encod.) # 0, 0 SequenceDelimitationItem
-----------------------
import os
from os import path
from pydicom.dataset import Dataset, FileMetaDataset
from pydicom.uid import ExplicitVRLittleEndian
wl_file_name = "directory/file.wl"
txt_file_name = "directory/file.txt"
# Create data set
ds = Dataset()
# Add file meta information elements
ds.file_meta = FileMetaDataset()
ds.file_meta.TransferSyntaxUID = ExplicitVRLittleEndian
ds.file_meta.MediaStorageSOPClassUID = "0"
ds.file_meta.MediaStorageSOPInstanceUID = "0"
# Fill out the worklist query elements
ds.SpecificCharacterSet = "ISO_IR 6"
ds.InstanceCreationDate = "20220101"
ds.AccessionNumber = "12345-abc"
ds.PatientName = "SURNAME^NAME"
ds.PatientID = "123456"
ds.PatientBirthDate = "19700101"
ds.PatientSex = "M"
ds.StudyInstanceUID = "1a-2b-3c"
ds.RequestedProcedureDescription = "ProcedureDescription"
ds.ScheduledProcedureStepSequence = [Dataset()]
ds.ScheduledProcedureStepSequence[0].Modality = "OT"
ds.ScheduledProcedureStepSequence[0].ScheduledStationAETitle = "OT"
ds.ScheduledProcedureStepSequence[0].ScheduledProcedureStepStartDate = "20220101"
ds.ScheduledProcedureStepSequence[0].ScheduledProcedureStepStartTime = "080000"
ds.ScheduledProcedureStepSequence[0].ScheduledPerformingPhysicianName = "Doctor Emmet Brown"
ds.ScheduledProcedureStepSequence[0].ScheduledProcedureStepDescription = "SchedProcStepDesc"
ds.ScheduledProcedureStepID = "0001"
# more stuff if you need
# Save directly as a .wl file.
# Set write_like_original=False to be certain you’re writing the dataset in the DICOM File Format
ds.save_as(wl_file_name, write_like_original=False)
# Additionally, you can also make a readable txt file for humans
# Check if txt file already exists
if(path.exists(txt_file_name)): #if txt file exists, remove it first
try:
os.remove(txt_file_name)
except OSError as e:
print("Error: %s : %s" % (txt_file_name, e.strerror))
# Run dcmdump command to convert wl file to txt
convert_wl_to_txt_cmd = "dcmdump " + wl_file_name + " > " + txt_file_name
os.system(convert_wl_to_txt_cmd)
# Dicom-File-Format
# Dicom-Meta-Information-Header
# Used TransferSyntax: Little Endian Explicit
(0002,0000) UL 120 # 4, 1 FileMetaInformationGroupLength
(0002,0001) OB 00\01 # 2, 1 FileMetaInformationVersion
(0002,0002) UI [0] # 2, 1 MediaStorageSOPClassUID
(0002,0003) UI [0] # 2, 1 MediaStorageSOPInstanceUID
(0002,0010) UI =LittleEndianExplicit # 20, 1 TransferSyntaxUID
(0002,0012) UI [1.2.826.0.1.3680043.8.498.1] # 28, 1 ImplementationClassUID
(0002,0013) SH [PYDICOM 2.2.2] # 14, 1 ImplementationVersionName
# Dicom-Data-Set
# Used TransferSyntax: Little Endian Explicit
(0008,0005) CS [ISO_IR 6] # 8, 1 SpecificCharacterSet
(0008,0012) DA [20220101] # 8, 1 InstanceCreationDate
(0008,0050) SH [12345-abc] # 10, 1 AccessionNumber
(0010,0010) PN [SURNAME^NAME] # 12, 1 PatientName
(0010,0020) LO [123456] # 6, 1 PatientID
(0010,0030) DA [19700101] # 8, 1 PatientBirthDate
(0010,0040) CS [M] # 2, 1 PatientSex
(0020,000d) UI [1a-2b-3c] # 8, 1 StudyInstanceUID
(0032,1060) LO [ProcedureDescription] # 20, 1 RequestedProcedureDescription
(0040,0009) SH [0001] # 4, 1 ScheduledProcedureStepID
(0040,0100) SQ (Sequence with explicit length #=1) # 110, 1 ScheduledProcedureStepSequence
(fffe,e000) na (Item with explicit length #=6) # 102, 1 Item
(0008,0060) CS [OT] # 2, 1 Modality
(0040,0001) AE [OT] # 2, 1 ScheduledStationAETitle
(0040,0002) DA [20220101] # 8, 1 ScheduledProcedureStepStartDate
(0040,0003) TM [080000] # 6, 1 ScheduledProcedureStepStartTime
(0040,0006) PN [Doctor Emmet Brown] # 18, 1 ScheduledPerformingPhysicianName
(0040,0007) LO [SchedProcStepDesc] # 18, 1 ScheduledProcedureStepDescription
(fffe,e00d) na (ItemDelimitationItem for re-encoding) # 0, 0 ItemDelimitationItem
(fffe,e0dd) na (SequenceDelimitationItem for re-encod.) # 0, 0 SequenceDelimitationItem
Caching (Caffeine) & Spring - two caches based on method parameter value
@Caching(cacheable = {
@Cacheable(cacheNames = "someDataCache1", condition = "#group.equals('group1')"),
@Cacheable(cacheNames = "someDataCache2", condition = "#group.equals('group2')")
})
public List<String> getSomeData(String group) {
return someService.getSomeDataForGroup(group);
}
How can I use the ggplot function to visualise grouped data?
library(ggplot2)
library(dplyr)
tidied_data_2 <- read.table(text = "participant condition response_time caffeine
1 1 Normal 984 1
2 2 Normal 1005 1
3 3 Normal 979 3
4 4 Normal 1040 2
5 5 Normal 1008 2
6 6 Normal 979 3", head = TRUE)
tidied_data_2 %>%
ggplot(aes(x = as.character(caffeine), y = response_time, colour = as.character(caffeine))) +
## geom_violin does not make sense with so few observations
# geom_violin() +
## I've removed alpha so you can see the dots better
geom_jitter(width = .1) +
guides(colour = FALSE) +
stat_summary(fun.data = "mean_cl_boot", colour = "black") +
theme_minimal() +
theme(text = element_text(size = 13)) +
labs(x = "Condition X Caffeine", y = "Response Time (ms)")
tidied_data_2 %>%
## in this example as.integer(as.character(x)) is unnecessary, but it is necessary for your data sample
ggplot(aes(x = as.integer(as.character(caffeine)), y = response_time)) +
geom_jitter(width = .1) +
theme_minimal()
-----------------------
library(ggplot2)
library(dplyr)
tidied_data_2 <- read.table(text = "participant condition response_time caffeine
1 1 Normal 984 1
2 2 Normal 1005 1
3 3 Normal 979 3
4 4 Normal 1040 2
5 5 Normal 1008 2
6 6 Normal 979 3", head = TRUE)
tidied_data_2 %>%
ggplot(aes(x = as.character(caffeine), y = response_time, colour = as.character(caffeine))) +
## geom_violin does not make sense with so few observations
# geom_violin() +
## I've removed alpha so you can see the dots better
geom_jitter(width = .1) +
guides(colour = FALSE) +
stat_summary(fun.data = "mean_cl_boot", colour = "black") +
theme_minimal() +
theme(text = element_text(size = 13)) +
labs(x = "Condition X Caffeine", y = "Response Time (ms)")
tidied_data_2 %>%
## in this example as.integer(as.character(x)) is unnecessary, but it is necessary for your data sample
ggplot(aes(x = as.integer(as.character(caffeine)), y = response_time)) +
geom_jitter(width = .1) +
theme_minimal()
How to alternate actions on variables in an array (one of two things for every other variable)
$policies = @('A', 'B')
$pickSecond = $false
foreach($SpecificEndpoint in $EndpointList)
{
# pick policy
$policy = $policies[$pickSecond]
# toggle `pickSecond` for next time
$pickSecond = -not $pickSecond
$SpecificEndpoint.policyId = $policy
# ... perform API call
}
How can I extract bigrams from text without removing the hash symbol?
library(tidyverse)
library(tidytext)
library(tm)
library(purrr)
x <- (c("I went to afternoon tea with her majesty and #queen @Victoria in the palace.", "Does tea have extra caffeine?"))
clean_Twitter_Corpus <- function(x) {
x = tolower(x) # convert to lower case characters
x = stripWhitespace(x) # removing white space
x = gsub("^\\s+|\\s+$", "", x) # remove leading and trailing white space
x = removeWords(x,stopwords("english")) # remove stopwords
return(x)
}
# A custom build function that will take in a sentence and create
# a tibble of ngrams
ngrams_build = function(sentence, column_name, n = 2) {
words <- sentence %>% str_split(pattern = " ", simplify = TRUE)
words <- words[words != ""]
ngrams <- map_chr(1:(length(words) - n + 1),
.f = function(x, words, n) {
paste(words[x:(x + n - 1)], collapse = " ")
}, words = words, n = n)
tibble(!!column_name := ngrams)
}
# clean the twitter texts. call the clean_Twitter_Corpus function
tweets <- clean_Twitter_Corpus(x)
tweets
#> [1] " went afternoon tea majesty #queen @victoria palace."
#> [2] " tea extra caffeine?"
text <- as.character(tweets)
text <- as.data.frame(text)
tidy_descr_ngrams <-
# here I use purrr function with the custom function
map_dfr(text$text, ngrams_build, column_name = "bigram", n = 2) %>%
separate(bigram, c("word1", "word2"), sep = " ")
# Here is the output which is similar to unnest_tokens but has special
# character included
tidy_descr_ngrams
#> # A tibble: 8 x 2
#> word1 word2
#> <chr> <chr>
#> 1 went afternoon
#> 2 afternoon tea
#> 3 tea majesty
#> 4 majesty #queen
#> 5 #queen @victoria
#> 6 @victoria palace.
#> 7 tea extra
#> 8 extra caffeine?
bigram_counts <- tidy_descr_ngrams %>%
count(word1, word2, sort = TRUE)
bigram_counts
#> # A tibble: 8 x 3
#> word1 word2 n
#> <chr> <chr> <int>
#> 1 #queen @victoria 1
#> 2 @victoria palace. 1
#> 3 afternoon tea 1
#> 4 extra caffeine? 1
#> 5 majesty #queen 1
#> 6 tea extra 1
#> 7 tea majesty 1
#> 8 went afternoon 1
-----------------------
library(tidyverse)
library(tidytext)
library(tm)
library(purrr)
x <- (c("I went to afternoon tea with her majesty and #queen @Victoria in the palace.", "Does tea have extra caffeine?"))
clean_Twitter_Corpus <- function(x) {
x = tolower(x) # convert to lower case characters
x = stripWhitespace(x) # removing white space
x = gsub("^\\s+|\\s+$", "", x) # remove leading and trailing white space
x = removeWords(x,stopwords("english")) # remove stopwords
return(x)
}
# A custom build function that will take in a sentence and create
# a tibble of ngrams
ngrams_build = function(sentence, column_name, n = 2) {
words <- sentence %>% str_split(pattern = " ", simplify = TRUE)
words <- words[words != ""]
ngrams <- map_chr(1:(length(words) - n + 1),
.f = function(x, words, n) {
paste(words[x:(x + n - 1)], collapse = " ")
}, words = words, n = n)
tibble(!!column_name := ngrams)
}
# clean the twitter texts. call the clean_Twitter_Corpus function
tweets <- clean_Twitter_Corpus(x)
tweets
#> [1] " went afternoon tea majesty #queen @victoria palace."
#> [2] " tea extra caffeine?"
text <- as.character(tweets)
text <- as.data.frame(text)
tidy_descr_ngrams <-
# here I use purrr function with the custom function
map_dfr(text$text, ngrams_build, column_name = "bigram", n = 2) %>%
separate(bigram, c("word1", "word2"), sep = " ")
# Here is the output which is similar to unnest_tokens but has special
# character included
tidy_descr_ngrams
#> # A tibble: 8 x 2
#> word1 word2
#> <chr> <chr>
#> 1 went afternoon
#> 2 afternoon tea
#> 3 tea majesty
#> 4 majesty #queen
#> 5 #queen @victoria
#> 6 @victoria palace.
#> 7 tea extra
#> 8 extra caffeine?
bigram_counts <- tidy_descr_ngrams %>%
count(word1, word2, sort = TRUE)
bigram_counts
#> # A tibble: 8 x 3
#> word1 word2 n
#> <chr> <chr> <int>
#> 1 #queen @victoria 1
#> 2 @victoria palace. 1
#> 3 afternoon tea 1
#> 4 extra caffeine? 1
#> 5 majesty #queen 1
#> 6 tea extra 1
#> 7 tea majesty 1
#> 8 went afternoon 1
-----------------------
library(tidyverse)
library(tidytext)
library(tm)
library(purrr)
x <- (c("I went to afternoon tea with her majesty and #queen @Victoria in the palace.", "Does tea have extra caffeine?"))
clean_Twitter_Corpus <- function(x) {
x = tolower(x) # convert to lower case characters
x = stripWhitespace(x) # removing white space
x = gsub("^\\s+|\\s+$", "", x) # remove leading and trailing white space
x = removeWords(x,stopwords("english")) # remove stopwords
return(x)
}
# A custom build function that will take in a sentence and create
# a tibble of ngrams
ngrams_build = function(sentence, column_name, n = 2) {
words <- sentence %>% str_split(pattern = " ", simplify = TRUE)
words <- words[words != ""]
ngrams <- map_chr(1:(length(words) - n + 1),
.f = function(x, words, n) {
paste(words[x:(x + n - 1)], collapse = " ")
}, words = words, n = n)
tibble(!!column_name := ngrams)
}
# clean the twitter texts. call the clean_Twitter_Corpus function
tweets <- clean_Twitter_Corpus(x)
tweets
#> [1] " went afternoon tea majesty #queen @victoria palace."
#> [2] " tea extra caffeine?"
text <- as.character(tweets)
text <- as.data.frame(text)
tidy_descr_ngrams <-
# here I use purrr function with the custom function
map_dfr(text$text, ngrams_build, column_name = "bigram", n = 2) %>%
separate(bigram, c("word1", "word2"), sep = " ")
# Here is the output which is similar to unnest_tokens but has special
# character included
tidy_descr_ngrams
#> # A tibble: 8 x 2
#> word1 word2
#> <chr> <chr>
#> 1 went afternoon
#> 2 afternoon tea
#> 3 tea majesty
#> 4 majesty #queen
#> 5 #queen @victoria
#> 6 @victoria palace.
#> 7 tea extra
#> 8 extra caffeine?
bigram_counts <- tidy_descr_ngrams %>%
count(word1, word2, sort = TRUE)
bigram_counts
#> # A tibble: 8 x 3
#> word1 word2 n
#> <chr> <chr> <int>
#> 1 #queen @victoria 1
#> 2 @victoria palace. 1
#> 3 afternoon tea 1
#> 4 extra caffeine? 1
#> 5 majesty #queen 1
#> 6 tea extra 1
#> 7 tea majesty 1
#> 8 went afternoon 1
-----------------------
library(tidyverse)
library(tidytext)
library(tm)
library(purrr)
x <- (c("I went to afternoon tea with her majesty and #queen @Victoria in the palace.", "Does tea have extra caffeine?"))
clean_Twitter_Corpus <- function(x) {
x = tolower(x) # convert to lower case characters
x = stripWhitespace(x) # removing white space
x = gsub("^\\s+|\\s+$", "", x) # remove leading and trailing white space
x = removeWords(x,stopwords("english")) # remove stopwords
return(x)
}
# A custom build function that will take in a sentence and create
# a tibble of ngrams
ngrams_build = function(sentence, column_name, n = 2) {
words <- sentence %>% str_split(pattern = " ", simplify = TRUE)
words <- words[words != ""]
ngrams <- map_chr(1:(length(words) - n + 1),
.f = function(x, words, n) {
paste(words[x:(x + n - 1)], collapse = " ")
}, words = words, n = n)
tibble(!!column_name := ngrams)
}
# clean the twitter texts. call the clean_Twitter_Corpus function
tweets <- clean_Twitter_Corpus(x)
tweets
#> [1] " went afternoon tea majesty #queen @victoria palace."
#> [2] " tea extra caffeine?"
text <- as.character(tweets)
text <- as.data.frame(text)
tidy_descr_ngrams <-
# here I use purrr function with the custom function
map_dfr(text$text, ngrams_build, column_name = "bigram", n = 2) %>%
separate(bigram, c("word1", "word2"), sep = " ")
# Here is the output which is similar to unnest_tokens but has special
# character included
tidy_descr_ngrams
#> # A tibble: 8 x 2
#> word1 word2
#> <chr> <chr>
#> 1 went afternoon
#> 2 afternoon tea
#> 3 tea majesty
#> 4 majesty #queen
#> 5 #queen @victoria
#> 6 @victoria palace.
#> 7 tea extra
#> 8 extra caffeine?
bigram_counts <- tidy_descr_ngrams %>%
count(word1, word2, sort = TRUE)
bigram_counts
#> # A tibble: 8 x 3
#> word1 word2 n
#> <chr> <chr> <int>
#> 1 #queen @victoria 1
#> 2 @victoria palace. 1
#> 3 afternoon tea 1
#> 4 extra caffeine? 1
#> 5 majesty #queen 1
#> 6 tea extra 1
#> 7 tea majesty 1
#> 8 went afternoon 1
Explanations in Consistent OWL Ontologies
import java.io.File;
import java.util.Set;
import java.util.function.Supplier;
import org.junit.Test;
import org.semanticweb.HermiT.ReasonerFactory;
import org.semanticweb.owl.explanation.api.Explanation;
import org.semanticweb.owl.explanation.api.ExplanationGenerator;
import org.semanticweb.owl.explanation.api.ExplanationGeneratorFactory;
import org.semanticweb.owl.explanation.api.ExplanationProgressMonitor;
import org.semanticweb.owl.explanation.impl.blackbox.Configuration;
import org.semanticweb.owl.explanation.impl.blackbox.DivideAndConquerContractionStrategy;
import org.semanticweb.owl.explanation.impl.blackbox.EntailmentCheckerFactory;
import org.semanticweb.owl.explanation.impl.blackbox.InitialEntailmentCheckStrategy;
import org.semanticweb.owl.explanation.impl.blackbox.StructuralTypePriorityExpansionStrategy;
import org.semanticweb.owl.explanation.impl.blackbox.checker.BlackBoxExplanationGeneratorFactory;
import org.semanticweb.owl.explanation.impl.blackbox.checker.SatisfiabilityEntailmentCheckerFactory;
import org.semanticweb.owlapi.apibinding.OWLManager;
import org.semanticweb.owlapi.model.OWLAxiom;
import org.semanticweb.owlapi.model.OWLOntology;
import org.semanticweb.owlapi.model.OWLOntologyManager;
import org.semanticweb.owlapi.reasoner.OWLReasonerFactory;
public class CheckOntology {
@Test
public void should() throws Exception {
OWLOntologyManager m = OWLManager.createOWLOntologyManager();
OWLOntology o = m.loadOntologyFromOntologyDocument(new File("pizza.owl"));
OWLReasonerFactory rf = new ReasonerFactory(); // Get hold of a reasoner factory
// Create the explanation generator factory which uses reasoners provided by the specified
// reasoner factory
ExplanationGeneratorFactory<OWLAxiom> genFac =
createExplanationGeneratorFactory(rf, null, OWLManager::createOWLOntologyManager);
// Now create the actual explanation generator for our ontology
ExplanationGenerator<OWLAxiom> gen = genFac.createExplanationGenerator(o);
// Ask for explanations for some entailment
// Get a reference to the axiom that represents the entailment that we want explanation for
// this will just run the explanations for all axioms
o.logicalAxioms().forEach(e -> explain(e, gen));
}
void explain(OWLAxiom entailment, ExplanationGenerator<OWLAxiom> gen) {
// Get our explanations. Ask for a maximum of 5.
try {
Set<Explanation<OWLAxiom>> expl = gen.getExplanations(entailment, 5);
System.out.println("CheckOntology.explain() " + entailment);
expl.forEach(System.out::println);
} catch (Exception e) {
e.printStackTrace();
}
}
// this method replicates code existing in the owlexplanation project; it's needed because the factories in owlexplanation do not set InitialEntailmentCheckStrategy correctly
public static ExplanationGeneratorFactory<OWLAxiom> createExplanationGeneratorFactory(
OWLReasonerFactory reasonerFactory, ExplanationProgressMonitor<OWLAxiom> progressMonitor,
Supplier<OWLOntologyManager> m) {
EntailmentCheckerFactory<OWLAxiom> checker =
new SatisfiabilityEntailmentCheckerFactory(reasonerFactory, m);
Configuration<OWLAxiom> config = new Configuration<>(checker,
new StructuralTypePriorityExpansionStrategy<OWLAxiom>(
InitialEntailmentCheckStrategy.PERFORM, m),
new DivideAndConquerContractionStrategy<OWLAxiom>(), progressMonitor, m);
return new BlackBoxExplanationGeneratorFactory<>(config);
}
}
Spring boot + Caffeine cache + Check header
@Component
public class CheckHeaderInterceptor implements HandlerInterceptor {
@Override
public boolean preHandle(HttpServletRequest request, HttpServletResponse response, Object handler)
throws Exception {
// validate what you want, on error return false
// if everything its ok, return true
}
}
public class WebMvcConfig implements WebMvcConfigurer {
@Autowired
private CheckHeaderInterceptor interceptor;
@Override
public void addInterceptors(InterceptorRegistry registry) {
registry.addInterceptor(interceptor).addPathPatterns("url that you wannna use handler");
}
}
-----------------------
@Component
public class CheckHeaderInterceptor implements HandlerInterceptor {
@Override
public boolean preHandle(HttpServletRequest request, HttpServletResponse response, Object handler)
throws Exception {
// validate what you want, on error return false
// if everything its ok, return true
}
}
public class WebMvcConfig implements WebMvcConfigurer {
@Autowired
private CheckHeaderInterceptor interceptor;
@Override
public void addInterceptors(InterceptorRegistry registry) {
registry.addInterceptor(interceptor).addPathPatterns("url that you wannna use handler");
}
}
Using two dataframes how can I compare a lookup value as a substring in the column in another dataframe to create a new column if the match exists
pattern = '|'.join(df1['Ingredient_Name'].tolist())
out = df2['Ingredient_Name'].str.findall(pattern).apply(pd.Series)
out.columns = 'Ingredient_Name_' + (out.columns + 1).astype(str)
out = df2.join(out)
print(out)
# Output:
Ingredient_Name WordCount Num_Of_Ingredients \
0 ACETAMINOPHEN ACETYLSALICYLIC ACID CAFFEINE 4 3
1 ACEBUTOLOL ACETYLSALICYLIC ACID 3 2
2 COLISTIN HYDROCORTISONE NEOMYCIN THONZONIUM BROMIDE 5 4
3 BROMIDE 1 1
Ingredient_Name_1 Ingredient_Name_2 Ingredient_Name_3 Ingredient_Name_4
0 ACETAMINOPHEN ACETYLSALICYLIC ACID CAFFEINE NaN
1 ACEBUTOLOL ACETYLSALICYLIC ACID NaN NaN
2 COLISTIN HYDROCORTISONE NEOMYCIN THONZONIUM BROMIDE
3 BROMIDE NaN NaN NaN
-----------------------
output = df2['Ingredient_Name'].str.extractall(f"({'|'.join(df1['Ingredient_Name'])})").unstack()
#formatting
output = output.droplevel(0,1).rename_axis(None, axis=1).add_prefix("Ingredient_Name_")
>>> output
Ingredient_Name_0 Ingredient_Name_1 Ingredient_Name_2 Ingredient_Name_3
0 ACETAMINOPHEN ACETYLSALICYLIC ACID CAFFEINE NaN
1 ACEBUTOLOL ACETYLSALICYLIC ACID NaN NaN
2 COLISTIN HYDROCORTISONE NEOMYCIN THONZONIUM BROMIDE
3 BROMIDE NaN NaN NaN
-----------------------
def match_ingredients(row, df):
base_str = row['Ingredient_Name']
result_count = 1
result = {}
for idx, ingredient in df.iterrows():
if ingredient['Ingredient_Name'] in base_str:
result[f'Ingredient_{result_count}'] = ingredient['Ingredient_Name']
result_count += 1
base_str = base_str.replace(ingredient['Ingredient_Name'], "")
result['Ingredient_Name'] = base_str
return result
result = df2.apply(match_ingredients,axis=1, result_type='expand', args=(df1,))
QUESTION
grouped data select rows and include rows before/after
Asked 2022-Mar-25 at 11:37I have a grouped data and I want to select a rows that fulfill a certain condition (works with code provided below), but I also want to include the row before and after the selected rows (so basically match row with criteria and then select row +1 row up +1 row down from the original dataset). The code below give me two rows per group those are the rows that match for my criteria. I now also want to include the rows before and after the selected rows.
I tried the following code, but the code line which would give me this output does not work: desired_result[which(desired_result$Caffeinefactor == "yes") + c(-1:1), ] %>%
daf1 <- df %>%
setDT(df) %>%
dplyr::mutate(row_id = row_number()) %>%
dplyr::group_by(PATIENT.ID) %>%
dplyr::mutate(first_yes = cumsum(Caffeinefactor == "yes"),
last_no = (Caffeinefactor == "no") * row_number(),
last_no = ifelse(first_yes == 0, last_no, 0)) %>%
dplyr:: filter((first_yes == 1 & Caffeine >0) | last_no == max(last_no)) %>%
desired_result[which(desired_result$Caffeinefactor == "yes") + c(-1:1), ] %>%
dplyr::select(-first_yes, -last_no)
structure(list(PATIENT.ID = c(210625L, 210625L, 210625L, 210625L,
210625L, 210625L, 210625L, 210625L, 210625L, 210625L, 210625L,
210625L, 210625L, 210625L, 210625L, 210625L, 210625L, 221179L,
221179L, 221179L, 221179L, 221179L, 221179L, 221179L, 221179L,
221179L, 221179L, 221179L, 221179L, 221179L, 221179L, 301705L,
301705L, 301705L, 301705L, 301705L, 301705L, 301705L, 301705L,
301705L), Caffeine = c(0L, 0L, 0L, 0L, 200L, 200L, 200L, 0L,
200L, 200L, 200L, 200L, 200L, 0L, 0L, 200L, 200L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 200L, 200L, 400L, 400L, 400L, 0L, 0L,
0L, 0L, 0L, 0L, 200L, 0L, 200L), Caffeinefactor = c("no", "no",
"no", "no", "yes", "yes", "yes", "no", "yes", "yes", "yes", "yes",
"yes", "no", "no", "yes", "yes", "no", "no", "no", "no", "no",
"no", "no", "no", "no", "yes", "yes", "yes", "yes", "yes", "no",
"no", "no", "no", "no", "no", "yes", "no", "yes"), PULSE.WIDTH = c(0.5,
0.5, 0.75, 0.75, 0.75, 0.75, 0.75, 0.75, 0.75, 0.75, 0.75, 0.75,
0.75, 0.5, 0.5, 0.75, 1, 0.5, 0.5, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5)), class = c("data.table",
"data.frame"), row.names = c(NA, -40L), groups = structure(list(
PATIENT.ID = c(210625L, 221179L, 301705L, 303926L, 309668L,
312580L, 313644L, 316332L, 326693L, 336204L, 337968L, 340160L,
341787L, 343627L, 346128L, 349800L, 351701L, 352235L, 354458L,
356470L, 357583L, 358002L, 358447L, 359628L, 861559L, 875452L,
876598L, 877257L, 877535L, 878250L, 878781L, 880014L, 880911L,
881002L, 881972L, 882667L, 883375L, 883799L, 884914L, 885233L,
885802L, 889623L, 891145L, 892464L, 893308L, 895449L), .rows = structure(list(
1:17, 18:31, 32:46, 47:53, 54:61, 62:72, 73:95, 96:107,
108:131, 132:146, 147:165, 166:174, 175:186, 187:191,
192:203, 204:215, 216:227, 228:244, 245:263, 264:278,
279:290, 291:301, 302:313, 314:326, 327:350, 351:365,
366:376, 377:389, 390:399, 400:413, 414:424, 425:445,
446:453, 454:466, 467:477, 478:494, 495:508, 509:517,
518:524, 525:534, 535:554, 555:557, 558:573, 574:585,
586:593, 594:598), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -46L), .drop = TRUE), .internal.selfref = <pointer: 0x7fe7f7002ee0>)
What the output looks like now without the code line ( desired_result[which(desired_result$Caffeinefactor == "yes") + c(-1:1), ])
PATIENT.ID | Caffeinefactor | PULSE.WIDTH |
---|---|---|
210625L | no | output |
210625L | yes | output |
220909L | no | output |
220909L | yes | output |
301705L | no | output |
301705L | yes | output |
The output should look like this if one row is added before and after:
PATIENT.ID | Caffeinefactor | PULSE.WIDTH |
---|---|---|
210625L | no | output |
210625L | no | output |
210625L | yes | output |
210625L | yes | output |
220909L | no | output |
220909L | yes | output |
220909L | yes | output |
301705L | no | output |
301705L | no | output |
301705L | yes | output |
301705L | yes | output |
... I put output for pulse with, because I do not know what the exact value would be
The original dataset has more that 90 columns and I also need to keep the values in these column. I do not want to duplicate the rows that the shown code selected.
ANSWER
Answered 2022-Mar-25 at 11:37Would you consider creating a column to indicate which rows you wish to retain, and then filter
the selected row, and use lead
and lag
to keep the rows before and after those selected rows?
library(tidyverse)
df %>%
setDT(df) %>%
mutate(row_id = row_number()) %>%
group_by(PATIENT.ID) %>%
mutate(first_yes = cumsum(Caffeinefactor == "yes"),
last_no = (Caffeinefactor == "no") * row_number(),
last_no = ifelse(first_yes == 0, last_no, 0),
select_row = (first_yes == 1 & Caffeine > 0) | last_no == max(last_no)) %>%
filter(select_row | lag(select_row) | lead(select_row)) %>%
select(-first_yes, -last_no, -select_row)
Output
PATIENT.ID Caffeine Caffeinefactor PULSE.WIDTH row_id
<int> <int> <chr> <dbl> <int>
1 210625 0 no 0.75 3
2 210625 0 no 0.75 4
3 210625 200 yes 0.75 5
4 210625 200 yes 0.75 6
5 221179 0 no 1 25
6 221179 0 no 1 26
7 221179 200 yes 1 27
8 221179 200 yes 1 28
9 301705 0 no 0.5 36
10 301705 0 no 0.5 37
11 301705 200 yes 0.5 38
12 301705 0 no 0.5 39
Community Discussions, Code Snippets contain sources that include Stack Exchange Network
No vulnerabilities reported
Save this library and start creating your kit
Save this library and start creating your kit