Package 'samplingin' reference manual

Title:	Dynamic Survey Sampling Solutions
Description:	A robust solution employing SRS (Simple Random Sampling), systematic and PPS (Probability Proportional to Size) sampling methods, ensuring a methodical and representative selection of data. Seamlessly allocate predetermined allocations to smaller levels.
Authors:	Choerul Afifanto [aut, cre, cph]
Maintainer:	Choerul Afifanto <[email protected]>
License:	MIT + file LICENSE
Version:	1.0.7
Built:	2025-03-07 03:33:36 UTC
Source:	https://github.com/choerulafifanto/samplingin

Example of Allocation Data

Description

Example of Allocation Data for Sampling Purposes

Usage

alokasi_dt
alokasi_dt

Format

`alokasi_dt`

A data frame with 34 rows and 3 columns:

kdprov: province code
jml_kabkota: Population or number of regencies/cities
n_primary: Sample Allocation

...

Select Samples Given its Parameters

Description

Samples selection using systematic or PPS (Probability Proportional to Size) sampling method.

Usage

doSampling(
  pop,
  alloc,
  nsample,
  type,
  strata = NULL,
  ident = c("kdprov", "kdkab"),
  implicitby = NULL,
  method = "systematic",
  auxVar = NA,
  seed = 1,
  predetermined_rn = NULL,
  is_secondary = FALSE,
  verbose = TRUE
)
doSampling(
  pop,
  alloc,
  nsample,
  type,
  strata = NULL,
  ident = c("kdprov", "kdkab"),
  implicitby = NULL,
  method = "systematic",
  auxVar = NA,
  seed = 1,
  predetermined_rn = NULL,
  is_secondary = FALSE,
  verbose = TRUE
)

Arguments

`pop`	pop dataframe
`alloc`	allocation dataframe
`nsample`	variable on alloc df as allocation sample
`type`	type value for sample classification ('U' = Primary Samples, 'P' = Secondary Samples)
`strata`	strata variable, must available on both pop and alloc dataframe
`ident`	group by on allocation dataframe
`implicitby`	variable used as implicit stratification
`method`	method of sampling : `"systematic"` (the default), `"srs"` or `"pps"`
`auxVar`	auxiliary variable for pps sampling (`method = "pps"`)
`seed`	seed
`predetermined_rn`	predetermined random number variable on allocation dataframe, the default value is NULL, random number will be generated randomly
`is_secondary`	if the value is `TRUE`, it will maintains existing primary samples and selects units that have not been selected as samples (`FALSE` as default)
`verbose`	verbose (`TRUE` as default)

Value

list of population data ("pop"), selected samples ("sampledf"), and details of sampling process ("details")

Examples



library(samplingin)
library(magrittr)
library(dplyr)

# Simple Random Sampling (SRS)
dtSampling_srs = doSampling(
   pop         = pop_dt
   , alloc     = alokasi_dt
   , nsample   = "n_primary"
   , type      = "U"
   , ident     = c("kdprov")
   , method    = "srs"
   , auxVar    = "Total"
   , seed      = 7892
)

# Population data with flag sample
pop_dt = dtSampling_srs$pop

# Selected Samples
dsampel = dtSampling_srs$sampledf

# Details of sampling process
rincian = dtSampling_srs$details

# PPS Sampling
dtSampling_pps = doSampling(
   pop         = pop_dt
   , alloc     = alokasi_dt
   , nsample   = "n_primary"
   , type      = "U"
   , ident     = c("kdprov")
   , method    = "pps"
   , auxVar    = "Total"
   , seed      = 1234
)

# Population data with flag sample
pop_dt = dtSampling_pps$pop

# Selected Samples
dsampel = dtSampling_pps$sampledf

# Details of sampling process
rincian = dtSampling_pps$details

# Systematic Sampling
dtSampling_sys = doSampling(
   pop         = pop_dt
   , alloc     = alokasi_dt
   , nsample   = "n_primary"
   , type      = "U"
   , ident     = c("kdprov")
   , method    = "systematic"
   , seed      = 4321
)

# Population data with flag sample
pop_dt = dtSampling_sys$pop

# Selected Samples
dsampel = dtSampling_sys$sampledf

# Details of sampling process
rincian = dtSampling_sys$details

# Systematic Sampling (Secondary Samples)

alokasi_dt_p = alokasi_dt %>%
   mutate(n_secondary = 2 * n_primary)

dtSampling_sys_p = doSampling(
   pop           = dtSampling_sys$pop
   , alloc       = alokasi_dt_p
   , nsample     = "n_secondary"
   , type        = "P"
   , ident       = c("kdprov")
   , method      = "systematic"
   , seed        = 6789
   , is_secondary = TRUE
)

# Population data with flag sample
pop_dt = dtSampling_sys_p$pop

# Selected Samples
dsampel = dtSampling_sys_p$sampledf

# Details of sampling process
rincian = dtSampling_sys_p$details

# Systematic Sampling with predetermined random number (predetermined_rn parameter)

alokasi_dt_rn = alokasi_dt %>% rowwise() %>% mutate(ar = runif(n(),0,1)) %>% ungroup

dtSampling_sys = doSampling(
   pop         = pop_dt
   , alloc     = alokasi_dt_rn
   , nsample   = "n_primary"
   , type      = "U"
   , ident     = c("kdprov")
   , method    = "systematic"
   , predetermined_rn = "ar"
   , seed      = 4321
)

# Population data with flag sample
pop_dt = dtSampling_sys$pop

# Selected Samples
dsampel = dtSampling_sys$sampledf

# Details of sampling process
rincian = dtSampling_sys$details

library(samplingin)
library(magrittr)
library(dplyr)

# Simple Random Sampling (SRS)
dtSampling_srs = doSampling(
   pop         = pop_dt
   , alloc     = alokasi_dt
   , nsample   = "n_primary"
   , type      = "U"
   , ident     = c("kdprov")
   , method    = "srs"
   , auxVar    = "Total"
   , seed      = 7892
)

# Population data with flag sample
pop_dt = dtSampling_srs$pop

# Selected Samples
dsampel = dtSampling_srs$sampledf

# Details of sampling process
rincian = dtSampling_srs$details

# PPS Sampling
dtSampling_pps = doSampling(
   pop         = pop_dt
   , alloc     = alokasi_dt
   , nsample   = "n_primary"
   , type      = "U"
   , ident     = c("kdprov")
   , method    = "pps"
   , auxVar    = "Total"
   , seed      = 1234
)

# Population data with flag sample
pop_dt = dtSampling_pps$pop

# Selected Samples
dsampel = dtSampling_pps$sampledf

# Details of sampling process
rincian = dtSampling_pps$details

# Systematic Sampling
dtSampling_sys = doSampling(
   pop         = pop_dt
   , alloc     = alokasi_dt
   , nsample   = "n_primary"
   , type      = "U"
   , ident     = c("kdprov")
   , method    = "systematic"
   , seed      = 4321
)

# Population data with flag sample
pop_dt = dtSampling_sys$pop

# Selected Samples
dsampel = dtSampling_sys$sampledf

# Details of sampling process
rincian = dtSampling_sys$details

# Systematic Sampling (Secondary Samples)

alokasi_dt_p = alokasi_dt %>%
   mutate(n_secondary = 2 * n_primary)

dtSampling_sys_p = doSampling(
   pop           = dtSampling_sys$pop
   , alloc       = alokasi_dt_p
   , nsample     = "n_secondary"
   , type        = "P"
   , ident       = c("kdprov")
   , method      = "systematic"
   , seed        = 6789
   , is_secondary = TRUE
)

# Population data with flag sample
pop_dt = dtSampling_sys_p$pop

# Selected Samples
dsampel = dtSampling_sys_p$sampledf

# Details of sampling process
rincian = dtSampling_sys_p$details

# Systematic Sampling with predetermined random number (predetermined_rn parameter)

alokasi_dt_rn = alokasi_dt %>% rowwise() %>% mutate(ar = runif(n(),0,1)) %>% ungroup

dtSampling_sys = doSampling(
   pop         = pop_dt
   , alloc     = alokasi_dt_rn
   , nsample   = "n_primary"
   , type      = "U"
   , ident     = c("kdprov")
   , method    = "systematic"
   , predetermined_rn = "ar"
   , seed      = 4321
)

# Population data with flag sample
pop_dt = dtSampling_sys$pop

# Selected Samples
dsampel = dtSampling_sys$sampledf

# Details of sampling process
rincian = dtSampling_sys$details

Allocate Predetermined Allocations to Smaller Levels

Description

Allocate predetermined allocations to smaller levels using proportional allocation method

Usage

get_allocation(data, n_alloc, group, pop_var = "jml", secondary = 0)
get_allocation(data, n_alloc, group, pop_var = "jml", secondary = 0)

Arguments

`data`	population tabulation dataframe
`n_alloc`	total allocation dataframe
`group`	group of allocation level to be obtained
`pop_var`	population variable in data
`secondary`	how many times the secondary sample compares to primary sample

Value

allocation at more detailed level

Examples


library(samplingin)
library(magrittr)

contoh_alokasi = alokasi_dt %>%
   dplyr::select(-n_primary) %>%
   dplyr::mutate(nasional = 1)

alokasi_dt = get_allocation(
   data = contoh_alokasi
   , n_alloc = 100
   , group = c("nasional")
   , pop_var = "jml_kabkota"
)

library(samplingin)
library(magrittr)

contoh_alokasi = alokasi_dt %>%
   dplyr::select(-n_primary) %>%
   dplyr::mutate(nasional = 1)

alokasi_dt = get_allocation(
   data = contoh_alokasi
   , n_alloc = 100
   , group = c("nasional")
   , pop_var = "jml_kabkota"
)

Indonesian Population (SP2020)

Description

Tabulation of Indonesia's population based on the results of the 2020 population census by regency/city and gender

Usage

pop_dt
pop_dt

Format

`pop_dt`

A data frame with 514 rows and 8 columns:

idkab: region id
kdprov: province code
kdkab: regency/city code
nmprov: province name
nmkab: regency/city name
Laki-laki: Male Population
Perempuan: Female Population
Total: Total Population

...

Source

https://sensus.bps.go.id/main/index/sp2020

round_preserve_sum

Description

round_preserve_sum

Usage

round_preserve_sum(x, digits = 0)
round_preserve_sum(x, digits = 0)

Arguments

`x`	a number
`digits`	0 (default)

Package 'samplingin'

Help Index

Example of Allocation Data

Description

Usage

Format

alokasi_dt

Select Samples Given its Parameters

Description

Usage

Arguments

Value

Examples

Allocate Predetermined Allocations to Smaller Levels

Description

Usage

Arguments

Value

Examples

Indonesian Population (SP2020)

Description

Usage

Format

pop_dt

Source

round_preserve_sum

Description

Usage

Arguments

`alokasi_dt`

`pop_dt`