Module: HTS::LibHTS

Extended by:
FFI::Library
Defined in:
lib/hts/libhts.rb,
lib/hts/libhts/hts.rb,
lib/hts/libhts/sam.rb,
lib/hts/libhts/tbx.rb,
lib/hts/libhts/vcf.rb,
lib/hts/libhts/bgzf.rb,
lib/hts/libhts/faidx.rb,
lib/hts/libhts/hfile.rb,
lib/hts/libhts/kfunc.rb,
lib/hts/libhts/constants.rb,
lib/hts/libhts/sam_funcs.rb,
lib/hts/libhts/vcf_funcs.rb,
lib/hts/libhts/thread_pool.rb

Defined Under Namespace

Classes: BGZF, Bam1, Bam1Core, BamPileup1, BamPileupCd, Bcf1, BcfDec, BcfFmt, BcfHdr, BcfHrec, BcfIdinfo, BcfIdpair, BcfInfo, BcfVariant, Faidx, HtsFile, HtsFormat, HtsIdx, HtsItr, HtsOpt, HtsTpool, KSeq, KString, SamHdr, Tbx, TbxConf

Constant Summary collapse

BAM_CMATCH =

constants

0
BAM_CINS =
1
BAM_CDEL =
2
BAM_CREF_SKIP =
3
BAM_CSOFT_CLIP =
4
BAM_CHARD_CLIP =
5
BAM_CPAD =
6
BAM_CEQUAL =
7
BAM_CDIFF =
8
BAM_CBACK =
9
BAM_CIGAR_STR =
"MIDNSHP=XB"
BAM_CIGAR_SHIFT =
4
BAM_CIGAR_MASK =
0xf
BAM_CIGAR_TYPE =
0x3C1A7
BAM_FPAIRED =
1
BAM_FPROPER_PAIR =
2
BAM_FUNMAP =
4
BAM_FMUNMAP =
8
BAM_FREVERSE =
16
BAM_FMREVERSE =
32
BAM_FREAD1 =
64
BAM_FREAD2 =
128
BAM_FSECONDARY =
256
BAM_FQCFAIL =
512
BAM_FDUP =
1024
BAM_FSUPPLEMENTARY =
2048
BCF_HL_FLT =

constants

0
BCF_HL_INFO =

header line

1
BCF_HL_FMT =
2
BCF_HL_CTG =
3
BCF_HL_STR =

structured header line TAG=<A=..,B=..>

4
BCF_HL_GEN =

generic header line

5
BCF_HT_FLAG =

header type

0
BCF_HT_INT =
1
BCF_HT_REAL =
2
BCF_HT_STR =
3
BCF_HT_LONG =

BCF_HT_INT, but for int64_t values; VCF only!

(BCF_HT_INT | 0x100)
BCF_VL_FIXED =

variable length

0
BCF_VL_VAR =
1
BCF_VL_A =
2
BCF_VL_G =
3
BCF_VL_R =
4
BCF_DT_ID =

dictionary type

0
BCF_DT_CTG =
1
BCF_DT_SAMPLE =
2
BCF_BT_NULL =
0
BCF_BT_INT8 =
1
BCF_BT_INT16 =
2
BCF_BT_INT32 =
3
BCF_BT_INT64 =

Unofficial, for internal use only.

4
BCF_BT_FLOAT =
5
BCF_BT_CHAR =
7
VCF_REF =
0
VCF_SNP =
1
VCF_MNP =
2
VCF_INDEL =
4
VCF_OTHER =
8
VCF_BND =

breakend

16
VCF_OVERLAP =

overlapping deletion, ALT=*

32
BCF1_DIRTY_ID =
1
BCF1_DIRTY_ALS =
2
BCF1_DIRTY_FLT =
4
BCF1_DIRTY_INF =
8
BCF_ERR_CTG_UNDEF =
1
BCF_ERR_TAG_UNDEF =
2
BCF_ERR_NCOLS =
4
BCF_ERR_LIMITS =
8
BCF_ERR_CHAR =
16
BCF_ERR_CTG_INVALID =
32
BCF_ERR_TAG_INVALID =
64
BCF_UN_STR =

up to ALT inclusive

1
BCF_UN_FLT =

up to FILTER

2
BCF_UN_INFO =

up to INFO

4
BCF_UN_SHR =

all shared information

(BCF_UN_STR | BCF_UN_FLT | BCF_UN_INFO)
BCF_UN_FMT =

unpack format and each sample

8
BCF_UN_IND =

a synonym of BCF_UN_FMT

BCF_UN_FMT
BCF_UN_ALL =

everything

(BCF_UN_SHR | BCF_UN_FMT)
HtsLogLevel =

hts

enum(
  :off,            # All logging disabled.
  :error,          # Logging of errors only.
  :warning, 3,     # Logging of errors and warnings.
  :info,           # Logging of errors, warnings, and normal but significant events.
  :debug,          # Logging of all except the most detailed debug events.
  :trace           # All logging enabled.
)
HtsFormatCategory =
enum(
  :unknown_category,
  :sequence_data,    # Sequence data -- SAM, BAM, CRAM, etc
  :variant_data,     # Variant calling data -- VCF, BCF, etc
  :index_file,       # Index file associated with some data file
  :region_list,      # Coordinate intervals or regions -- BED, etc
  :category_maximum, 32_767
)
HtsExactFormat =
enum(
  :unknown_format,
  :binary_format, :text_format,
  :sam, :bam, :bai, :cram, :crai, :vcf, :bcf, :csi, :gzi, :tbi, :bed,
  :htsget, :json,
  :empty_format,
  :fasta_format, :fastq_format, :fai_format, :fqi_format,
  :hts_crypt4gh_format,
  :format_maximum, 32_767
)
HtsCompression =
enum(
  :no_compression, :gzip, :bgzf, :custom,
  :compression_maximum, 32_767
)
HtsFmtOption =
enum(
  :CRAM_OPT_DECODE_MD,
  :CRAM_OPT_PREFIX,
  :CRAM_OPT_VERBOSITY,   # obsolete, use hts_set_log_level() instead
  :CRAM_OPT_SEQS_PER_SLICE,
  :CRAM_OPT_SLICES_PER_CONTAINER,
  :CRAM_OPT_RANGE,
  :CRAM_OPT_VERSION,     # rename to :CRAM_version?
  :CRAM_OPT_EMBED_REF,
  :CRAM_OPT_IGNORE_MD5,
  :CRAM_OPT_REFERENCE,   # make general
  :CRAM_OPT_MULTI_SEQ_PER_SLICE,
  :CRAM_OPT_NO_REF,
  :CRAM_OPT_USE_BZIP2,
  :CRAM_OPT_SHARED_REF,
  :CRAM_OPT_NTHREADS,    # deprecated, use HTS_OPT_NTHREADS
  :CRAM_OPT_THREAD_POOL, # make general
  :CRAM_OPT_USE_LZMA,
  :CRAM_OPT_USE_RANS,
  :CRAM_OPT_REQUIRED_FIELDS,
  :CRAM_OPT_LOSSY_NAMES,
  :CRAM_OPT_BASES_PER_SLICE,
  :CRAM_OPT_STORE_MD,
  :CRAM_OPT_STORE_NM,
  :CRAM_OPT_RANGE_NOSEEK, # CRAM_OPT_RANGE minus the seek
  # General purpose
  :HTS_OPT_COMPRESSION_LEVEL, 100,
  :HTS_OPT_NTHREADS,
  :HTS_OPT_THREAD_POOL,
  :HTS_OPT_CACHE_SIZE,
  :HTS_OPT_BLOCK_SIZE
)
BamHdr =
SamHdr
SamFile =
HtsFile
FaiFormatOptions =

faidx

enum(:FAI_NONE, :FAI_FASTA, :FAI_FASTQ)
HtsTpoolProcess =

FIXME: struct

:pointer
HtsTpoolResult =
:pointer

Class Method Summary collapse

Class Method Details

.attach_functionObject



15
16
17
18
19
# File 'lib/hts/libhts.rb', line 15

def self.attach_function(*)
  super
rescue FFI::NotFoundError => e
  warn e.message
end

.bam_cigar_gen(l, o) ⇒ Object



38
39
40
# File 'lib/hts/libhts/sam.rb', line 38

def bam_cigar_gen(l, o)
  l << BAM_CIGAR_SHIFT | o
end

.bam_cigar_op(c) ⇒ Object



26
27
28
# File 'lib/hts/libhts/sam.rb', line 26

def bam_cigar_op(c)
  c & BAM_CIGAR_MASK
end

.bam_cigar_opchr(c) ⇒ Object



34
35
36
# File 'lib/hts/libhts/sam.rb', line 34

def bam_cigar_opchr(c)
  ("#{BAM_CIGAR_STR}??????")[bam_cigar_op(c)]
end

.bam_cigar_oplen(c) ⇒ Object



30
31
32
# File 'lib/hts/libhts/sam.rb', line 30

def bam_cigar_oplen(c)
  c >> BAM_CIGAR_SHIFT
end

.bam_cigar_type(o) ⇒ Object



42
43
44
# File 'lib/hts/libhts/sam.rb', line 42

def bam_cigar_type(o)
  BAM_CIGAR_TYPE >> (o << 1) & 3
end

.bam_get_aux(b) ⇒ Object



87
88
89
# File 'lib/hts/libhts/sam.rb', line 87

def bam_get_aux(b)
  b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1) + b[:core][:l_qseq]
end

.bam_get_cigar(b) ⇒ Object



75
76
77
# File 'lib/hts/libhts/sam.rb', line 75

def bam_get_cigar(b)
  b[:data] + b[:core][:l_qname]
end

.bam_get_l_aux(b) ⇒ Object



91
92
93
# File 'lib/hts/libhts/sam.rb', line 91

def bam_get_l_aux(b)
  b[:l_data] - (b[:core][:n_cigar] << 2) - b[:core][:l_qname] - b[:core][:l_qseq] - ((b[:core][:l_qseq] + 1) >> 1)
end

.bam_get_qname(b) ⇒ Object



71
72
73
# File 'lib/hts/libhts/sam.rb', line 71

def bam_get_qname(b)
  b[:data]
end

.bam_get_qual(b) ⇒ Object



83
84
85
# File 'lib/hts/libhts/sam.rb', line 83

def bam_get_qual(b)
  b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1)
end

.bam_get_seq(b) ⇒ Object



79
80
81
# File 'lib/hts/libhts/sam.rb', line 79

def bam_get_seq(b)
  b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname]
end

.bam_is_mrev(b) ⇒ Object



67
68
69
# File 'lib/hts/libhts/sam.rb', line 67

def bam_is_mrev(b)
  b[:core][:flag] & BAM_FMREVERSE != 0
end

.bam_is_rev(b) ⇒ Object



63
64
65
# File 'lib/hts/libhts/sam.rb', line 63

def bam_is_rev(b)
  b[:core][:flag] & BAM_FREVERSE != 0
end

.bam_seqi(s, i) ⇒ Object



95
96
97
# File 'lib/hts/libhts/sam.rb', line 95

def bam_seqi(s, i)
  s[(i) >> 1].read_uint8 >> ((~i & 1) << 2) & 0xf
end

.bcf_alleles2gt(a, b) ⇒ Object



155
156
157
# File 'lib/hts/libhts/vcf.rb', line 155

def bcf_alleles2gt(a, b)
  ((a) > (b) ? (a * (a + 1) / 2 + b) : (b * (b + 1) / 2 + a))
end

.bcf_get_format_char(hdr, line, tag, dst, ndst) ⇒ Object



183
184
185
# File 'lib/hts/libhts/vcf.rb', line 183

def bcf_get_format_char(hdr, line, tag, dst, ndst)
  bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_STR)
end

.bcf_get_format_float(hdr, line, tag, dst, ndst) ⇒ Object



179
180
181
# File 'lib/hts/libhts/vcf.rb', line 179

def bcf_get_format_float(hdr, line, tag, dst, ndst)
  bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)
end

.bcf_get_format_int32(hdr, line, tag, dst, ndst) ⇒ Object



175
176
177
# File 'lib/hts/libhts/vcf.rb', line 175

def bcf_get_format_int32(hdr, line, tag, dst, ndst)
  bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
end

.bcf_get_genotypes(hdr, line, dst, ndst) ⇒ Object



187
188
189
# File 'lib/hts/libhts/vcf.rb', line 187

def bcf_get_genotypes(hdr, line, dst, ndst)
  bcf_get_format_values(hdr, line, "GT", dst, ndst, BCF_HT_INT)
end

.bcf_get_info_flag(hdr, line, tag, dst, ndst) ⇒ Object



171
172
173
# File 'lib/hts/libhts/vcf.rb', line 171

def bcf_get_info_flag(hdr, line, tag, dst, ndst)
  bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_FLAG)
end

.bcf_get_info_float(hdr, line, tag, dst, ndst) ⇒ Object



163
164
165
# File 'lib/hts/libhts/vcf.rb', line 163

def bcf_get_info_float(hdr, line, tag, dst, ndst)
  bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)
end

.bcf_get_info_int32(hdr, line, tag, dst, ndst) ⇒ Object



159
160
161
# File 'lib/hts/libhts/vcf.rb', line 159

def bcf_get_info_int32(hdr, line, tag, dst, ndst)
  bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
end

.bcf_get_info_string(hdr, line, tag, dst, ndst) ⇒ Object



167
168
169
# File 'lib/hts/libhts/vcf.rb', line 167

def bcf_get_info_string(hdr, line, tag, dst, ndst)
  bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_STR)
end

.bcf_gt_allele(val) ⇒ Object



151
152
153
# File 'lib/hts/libhts/vcf.rb', line 151

def bcf_gt_allele(val)
  (((val) >> 1) - 1)
end

.bcf_gt_is_missing(val) ⇒ Object



143
144
145
# File 'lib/hts/libhts/vcf.rb', line 143

def bcf_gt_is_missing(val)
  ((val) >> 1 ? 0 : 1)
end

.bcf_gt_is_phased(idx) ⇒ Object



147
148
149
# File 'lib/hts/libhts/vcf.rb', line 147

def bcf_gt_is_phased(idx)
  ((idx) & 1)
end

.bcf_gt_missingObject



139
140
141
# File 'lib/hts/libhts/vcf.rb', line 139

def bcf_gt_missing
  0
end

.bcf_gt_phased(idx) ⇒ Object



131
132
133
# File 'lib/hts/libhts/vcf.rb', line 131

def bcf_gt_phased(idx)
  ((idx + 1) << 1 | 1)
end

.bcf_gt_unphased(idx) ⇒ Object



135
136
137
# File 'lib/hts/libhts/vcf.rb', line 135

def bcf_gt_unphased(idx)
  ((idx + 1) << 1)
end

.bcf_hdr_id2length(hdr, type, int_id) ⇒ Object



211
212
213
214
215
216
# File 'lib/hts/libhts/vcf.rb', line 211

def bcf_hdr_id2length(hdr, type, int_id)
  LibHTS::BcfIdpair.new(
    hdr[:id][LibHTS::BCF_DT_ID].to_ptr +
    LibHTS::BcfIdpair.size * int_id # offset
  )[:val][:info][type] >> 8 & 0xf
end

.bcf_hdr_id2name(hdr, rid) ⇒ Object



202
203
204
205
206
207
208
209
# File 'lib/hts/libhts/vcf.rb', line 202

def bcf_hdr_id2name(hdr, rid)
  return nil if hdr.null? || rid < 0 || rid >= hdr[:n][LibHTS::BCF_DT_CTG]

  LibHTS::BcfIdpair.new(
    hdr[:id][LibHTS::BCF_DT_CTG].to_ptr +
    LibHTS::BcfIdpair.size * rid # offset
  )[:key]
end

.bcf_hdr_id2number(hdr, type, int_id) ⇒ Object



218
219
220
221
222
223
# File 'lib/hts/libhts/vcf.rb', line 218

def bcf_hdr_id2number(hdr, type, int_id)
  LibHTS::BcfIdpair.new(
    hdr[:id][LibHTS::BCF_DT_ID].to_ptr +
    LibHTS::BcfIdpair.size * int_id # offset
  )[:val][:info][type] >> 12
end

.bcf_hdr_int2id(hdr, type, int_id) ⇒ Object



191
192
193
194
195
196
# File 'lib/hts/libhts/vcf.rb', line 191

def bcf_hdr_int2id(hdr, type, int_id)
  LibHTS::BcfIdpair.new(
    hdr[:id][type].to_ptr +
    LibHTS::BcfIdpair.size * int_id # offsets
  )[:key]
end

.bcf_hdr_name2id(hdr, id) ⇒ Object



198
199
200
# File 'lib/hts/libhts/vcf.rb', line 198

def bcf_hdr_name2id(hdr, id)
  bcf_hdr_id2int(hdr, BCF_DT_CTG, id)
end

.bcf_hdr_nsamples(hdr) ⇒ Object



92
93
94
# File 'lib/hts/libhts/vcf.rb', line 92

def bcf_hdr_nsamples(hdr)
  hdr[:n][BCF_DT_SAMPLE]
end

.bcf_update_format_char(hdr, line, key, values, n) ⇒ Object



122
123
124
125
# File 'lib/hts/libhts/vcf.rb', line 122

def bcf_update_format_char(hdr, line, key, values, n)
  bcf_update_format(hdr, line, key, values, n,
                    BCF_HT_STR)
end

.bcf_update_format_float(hdr, line, key, values, n) ⇒ Object



117
118
119
120
# File 'lib/hts/libhts/vcf.rb', line 117

def bcf_update_format_float(hdr, line, key, values, n)
  bcf_update_format(hdr, line, key, values, n,
                    BCF_HT_REAL)
end

.bcf_update_format_int32(hdr, line, key, values, n) ⇒ Object



112
113
114
115
# File 'lib/hts/libhts/vcf.rb', line 112

def bcf_update_format_int32(hdr, line, key, values, n)
  bcf_update_format(hdr, line, key, values, n,
                    BCF_HT_INT)
end

.bcf_update_genotypes(hdr, line, gts, n) ⇒ Object



127
128
129
# File 'lib/hts/libhts/vcf.rb', line 127

def bcf_update_genotypes(hdr, line, gts, n)
  bcf_update_format(hdr, line, "GT", gts, n, BCF_HT_INT)
end

.bcf_update_info_flag(hdr, line, key, string, n) ⇒ Object



104
105
106
# File 'lib/hts/libhts/vcf.rb', line 104

def bcf_update_info_flag(hdr, line, key, string, n)
  bcf_update_info(hdr, line, key, string, n, BCF_HT_FLAG)
end

.bcf_update_info_float(hdr, line, key, values, n) ⇒ Object



100
101
102
# File 'lib/hts/libhts/vcf.rb', line 100

def bcf_update_info_float(hdr, line, key, values, n)
  bcf_update_info(hdr, line, key, values, n, BCF_HT_REAL)
end

.bcf_update_info_int32(hdr, line, key, values, n) ⇒ Object



96
97
98
# File 'lib/hts/libhts/vcf.rb', line 96

def bcf_update_info_int32(hdr, line, key, values, n)
  bcf_update_info(hdr, line, key, values, n, BCF_HT_INT)
end

.bcf_update_info_string(hdr, line, key, string) ⇒ Object



108
109
110
# File 'lib/hts/libhts/vcf.rb', line 108

def bcf_update_info_string(hdr, line, key, string)
  bcf_update_info(hdr, line, key, string, 1, BCF_HT_STR)
end

.sam_itr_next(htsfp, itr, r) ⇒ Object

Get the next read from a SAM/BAM/CRAM iterator



346
347
348
349
350
351
# File 'lib/hts/libhts/sam_funcs.rb', line 346

def self.sam_itr_next(htsfp, itr, r)
  # FIXME: check if htsfp is compressed BGZF
  hts_log_error("Null iterator") if itr.null?
  # FIXME: check multi
  hts_itr_next(htsfp[:fp][:bgzf], itr, r, htsfp)
end