Module: SpecID::Pep

Included in:
Bioworks::Pep, Proph::Pep, SRF::OUT::Pep, GenericPep
Defined in:
lib/spec_id.rb,
lib/spec_id/bioworks.rb

Constant Summary collapse

Non_standard_amino_acid_char_re =
/[^A-Z\.\-]/

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#aaseqObject

the basic amino acid sequence (no leading or trailing ‘.’ or amino acids) should not contain any special symbols, etc.



544
545
546
# File 'lib/spec_id.rb', line 544

def aaseq
  @aaseq
end

#chargeObject

Returns the value of attribute charge.



545
546
547
# File 'lib/spec_id.rb', line 545

def charge
  @charge
end

#probabilityObject

Returns the value of attribute probability.



537
538
539
# File 'lib/spec_id.rb', line 537

def probability
  @probability
end

#protsObject

Returns the value of attribute prots.



536
537
538
# File 'lib/spec_id.rb', line 536

def prots
  @prots
end

#sequenceObject

full sequence: (<firstAA>.<sequence>.<last>) with ‘-’ for no first or last.



540
541
542
# File 'lib/spec_id.rb', line 540

def sequence
  @sequence
end

Class Method Details

.prepare_sequence(val) ⇒ Object

remove_non_amino_acids && split_sequence



554
555
556
557
# File 'lib/spec_id.rb', line 554

def self.prepare_sequence(val)
  nv = remove_non_amino_acids(val)
  split_sequence(nv)
end

.protein_groups_by_sequence(peptide_strings_list, fasta_obj) ⇒ Object

This will rapidly determine the list of proteins for which given peptides belong. It is meant to be low level and fast (eventually), so it asks for the data in a format amenable to this. returns a mirror array where each entry is an array of Fasta::Prot objects where each protein contains the sequence



615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
# File 'lib/spec_id.rb', line 615

def self.protein_groups_by_sequence(peptide_strings_list, fasta_obj)
  prots = fasta_obj.prots
  prot_seqs = prots.map do |prot|
    prot.aaseq
  end

  groups = peptide_strings_list.map do |pep_seq|
    prot_index = 0
    protein_group = []
    prot_seqs.each do |prot_seq|
      if prot_seq.include? pep_seq
        protein_group << prots[prot_index]
      end
      prot_index += 1
    end
    protein_group
  end

  groups
end

.remove_non_amino_acids(sequence) ⇒ Object

removes nonstandard chars with Non_standard_amino_acid_char_re preserves A-Z and ‘.’ and ‘-’



549
550
551
# File 'lib/spec_id.rb', line 549

def self.remove_non_amino_acids(sequence)
  sequence.gsub(Non_standard_amino_acid_char_re, '')
end

.sequence_to_aaseq(sequence) ⇒ Object



591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
# File 'lib/spec_id.rb', line 591

def self.sequence_to_aaseq(sequence)
  after_removed = remove_non_amino_acids(sequence)
  pieces = after_removed.split('.') 
  case pieces.size
  when 3
    pieces[1]
  when 2
    if pieces[0].size > 1  ## N termini
      pieces[0]
    else  ## C termini
      pieces[1]
    end
  when 1  ## this must be a parse error!
    pieces[0] ## which is the peptide itself  
  else
    abort "bad peptide sequence: #{sequence}"
  end
end

.split_sequence(val) ⇒ Object

Returns prev, peptide, next from sequence. Parse errors return nil,nil,nil

R.PEPTIDE.A  # -> R, PEPTIDE, A
R.PEPTIDE.-  # -> R, PEPTIDE, -
PEPTIDE.A    # -> -, PEPTIDE, A
A.PEPTIDE    # -> A, PEPTIDE, -
PEPTIDE      # -> nil,nil,nil


570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
# File 'lib/spec_id.rb', line 570

def self.split_sequence(val)
  peptide_prev_aa = ""; peptide = ""; peptide_next_aa = ""
  pieces = val.split('.') 
  case pieces.size
  when 3
    peptide_prev_aa, peptide, peptide_next_aa = *pieces
  when 2
    if pieces[0].size > 1  ## N termini
      peptide_prev_aa, peptide, peptide_next_aa = '-', pieces[0], pieces[1]
    else  ## C termini
      peptide_prev_aa, peptide, peptide_next_aa = pieces[0], pieces[1], '-'
    end
  when 1  ## this must be a parse error!
    peptide_prev_aa, peptide, peptide_next_aa = nil,nil,nil
  when 0
    peptide_prev_aa, peptide, peptide_next_aa = nil,nil,nil
  end
  return peptide_prev_aa, peptide, peptide_next_aa
end

Instance Method Details

#<=>(other) ⇒ Object



559
560
561
# File 'lib/spec_id.rb', line 559

def <=>(other)
  aaseq <=> other.aaseq
end

#mass_accuracy(pep, unit = :ppm, mono = true) ⇒ Object

units can be :mmu, :amu, :ppm



637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
# File 'lib/spec_id.rb', line 637

def mass_accuracy(pep, unit=:ppm, mono=true)
  # 10^6 * deltam accuracy/ m[measured]
  # i.e., theoretical mass 1000, measured 999.9: 100ppm
  # http://www.waters.com/WatersDivision/ContentD.asp?watersit=EGOO-66LRQD
  # pep.mass is the theoretical M+H of the peptide
  # this assumes that the deltacn value we're being told is correct, but I
  # have my suspicions (since the <mass> value is not accurate...)

  ######## TO COMPLETE (and add to spec_id..?)
  case unit
  when :ppm
  when :amu
  when :mmu
  end
end