Class: Lbp::File
- Inherits:
-
Object
- Object
- Lbp::File
- Defined in:
- lib/lbp/file.rb
Overview
class should be renamed to Transcription
Instance Attribute Summary collapse
-
#file_path ⇒ Object
readonly
Returns the value of attribute file_path.
-
#xslt_dir ⇒ Object
readonly
Returns the value of attribute xslt_dir.
Instance Method Summary collapse
- #author ⇒ Object
- #ed_date ⇒ Object
- #ed_no ⇒ Object
- #editor ⇒ Object
- #encoding_location ⇒ Object
- #encoding_method ⇒ Object
- #file ⇒ Object
-
#initialize(filepath, transcription_type, confighash) ⇒ File
constructor
A new instance of File.
- #nokogiri ⇒ Object
- #number_of_columns ⇒ Object
- #pub_date ⇒ Object
-
#title ⇒ Object
Item Header Extraction and Metadata Methods.
- #transcription_type ⇒ Object
-
#transcription_type_from_file ⇒ Object
Get transcription type.
-
#transform(xsltfile, xslt_param_array = []) ⇒ Object
Begin transform (XSLT) methocs ###.
- #transform_apply(xsltfile, xslt_param_array = []) ⇒ Object
- #transform_clean(xslt_param_array = []) ⇒ Object
- #transform_clean_nokogiri(xslt_param_array = []) ⇒ Object
- #transform_index_view(xslt_param_array = []) ⇒ Object
- #transform_json(xslt_param_array = []) ⇒ Object
- #transform_main_view(xslt_param_array = []) ⇒ Object
- #transform_plain_text(xslt_param_array = []) ⇒ Object
- #transform_plain_text_nokogiri(xslt_param_array = []) ⇒ Object
- #transform_toc(xslt_param_array = []) ⇒ Object
-
#validating_schema_version ⇒ Object
get validating schema label.
- #word_array ⇒ Object
-
#word_count ⇒ Object
End of Transformation Methods ### Begin Statistics Methods ###.
- #word_frequency(sort, order) ⇒ Object
Constructor Details
#initialize(filepath, transcription_type, confighash) ⇒ File
Returns a new instance of File.
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/lbp/file.rb', line 12 def initialize(filepath, transcription_type, confighash) @file_path = filepath @confighash = confighash unless confighash == nil @stylesheets = @confighash[:stylesheets] # identify propery xslt directory end # get trancription type from xmlfile @transcription_type = transcription_type # critical or documentary # there is also a method for this if one needs to get the type from the file itself # get xslt_version from xmlfile @xslt_version = self.validating_schema_version unless confighash == nil @xslt_dir = "#{@confighash[:xslt_base]}#{@xslt_version}/#{@transcription_type}/" end end |
Instance Attribute Details
#file_path ⇒ Object (readonly)
Returns the value of attribute file_path.
10 11 12 |
# File 'lib/lbp/file.rb', line 10 def file_path @file_path end |
#xslt_dir ⇒ Object (readonly)
Returns the value of attribute xslt_dir.
10 11 12 |
# File 'lib/lbp/file.rb', line 10 def xslt_dir @xslt_dir end |
Instance Method Details
#author ⇒ Object
78 79 80 81 82 |
# File 'lib/lbp/file.rb', line 78 def xmldoc = self.nokogiri = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc/tei:titleStmt[1]/tei:author", 'tei' => 'http://www.tei-c.org/ns/1.0') return .text end |
#ed_date ⇒ Object
93 94 95 96 97 |
# File 'lib/lbp/file.rb', line 93 def ed_date xmldoc = self.nokogiri ed_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:editionStmt[1]/tei:edition[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0') return ed_date.value end |
#ed_no ⇒ Object
88 89 90 91 92 |
# File 'lib/lbp/file.rb', line 88 def ed_no xmldoc = self.nokogiri ed_no = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:editionStmt[1]/tei:edition[1]/@n", 'tei' => 'http://www.tei-c.org/ns/1.0') return ed_no.value end |
#editor ⇒ Object
83 84 85 86 87 |
# File 'lib/lbp/file.rb', line 83 def editor xmldoc = self.nokogiri editor = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc/tei:titleStmt[1]/tei:editor", 'tei' => 'http://www.tei-c.org/ns/1.0') return editor.text end |
#encoding_location ⇒ Object
112 113 114 115 116 |
# File 'lib/lbp/file.rb', line 112 def encoding_location xmldoc = self.nokogiri encoding_location = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:variantEncoding[1]/@location", 'tei' => 'http://www.tei-c.org/ns/1.0') return encoding_location.value end |
#encoding_method ⇒ Object
107 108 109 110 111 |
# File 'lib/lbp/file.rb', line 107 def encoding_method xmldoc = self.nokogiri encoding_method = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:variantEncoding[1]/@method", 'tei' => 'http://www.tei-c.org/ns/1.0') return encoding_method.value end |
#file ⇒ Object
33 34 35 36 37 38 39 |
# File 'lib/lbp/file.rb', line 33 def file file = open(self.file_path) if file.base_uri.to_s != self.file_path file = open(self.file_path, {:http_basic_authentication => [@confighash[:git_username], @confighash[:git_password] ]}) end return file end |
#nokogiri ⇒ Object
40 41 42 |
# File 'lib/lbp/file.rb', line 40 def nokogiri xmldoc = Nokogiri::XML(self.file) end |
#number_of_columns ⇒ Object
117 118 119 120 121 122 123 124 125 126 127 128 |
# File 'lib/lbp/file.rb', line 117 def number_of_columns xmldoc = self.nokogiri test = xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0') if @transcription_type == "critical" number_of_columns = nil elsif xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0 number_of_columns = 1 elsif xmldoc.xpath("//tei:cb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0 number_of_columns = 2 end return number_of_columns end |
#pub_date ⇒ Object
98 99 100 101 102 103 104 105 106 |
# File 'lib/lbp/file.rb', line 98 def pub_date if self.validating_schema_version == "1.0.0" return "no pub date in this schema" else xmldoc = self.nokogiri pub_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:publicationStmt[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0') return pub_date.value end end |
#title ⇒ Object
Item Header Extraction and Metadata Methods
73 74 75 76 77 |
# File 'lib/lbp/file.rb', line 73 def title xmldoc = self.nokogiri title = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:titleStmt[1]/tei:title[1]", 'tei' => 'http://www.tei-c.org/ns/1.0') return title.text end |
#transcription_type ⇒ Object
69 70 71 |
# File 'lib/lbp/file.rb', line 69 def transcription_type end |
#transcription_type_from_file ⇒ Object
Get transcription type
46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/lbp/file.rb', line 46 def transcription_type_from_file xmldoc = self.nokogiri result = xmldoc.xpath("/tei:TEI/tei:text[1]/@type", 'tei' => 'http://www.tei-c.org/ns/1.0') if result.length > 0 return result.to_s else return "unknown" end end |
#transform(xsltfile, xslt_param_array = []) ⇒ Object
Begin transform (XSLT) methocs ###
131 132 133 |
# File 'lib/lbp/file.rb', line 131 def transform(xsltfile, xslt_param_array=[]) doc = xslt_transform(self.nokogiri, xsltfile, xslt_param_array) end |
#transform_apply(xsltfile, xslt_param_array = []) ⇒ Object
134 135 136 |
# File 'lib/lbp/file.rb', line 134 def transform_apply(xsltfile, xslt_param_array=[]) doc = xslt_apply_to(self.nokogiri, xsltfile, xslt_param_array) end |
#transform_clean(xslt_param_array = []) ⇒ Object
145 146 147 148 |
# File 'lib/lbp/file.rb', line 145 def transform_clean(xslt_param_array=[]) xsltfile=@xslt_dir + @stylesheets[:clean_view] # "clean_forStatistics.xsl" doc = self.transform_apply(xsltfile, xslt_param_array) end |
#transform_clean_nokogiri(xslt_param_array = []) ⇒ Object
149 150 151 152 |
# File 'lib/lbp/file.rb', line 149 def transform_clean_nokogiri(xslt_param_array=[]) xsltfile=@xslt_dir + @stylesheets[:clean_view] # "clean_forStatistics.xsl" doc = self.transform(xsltfile, xslt_param_array) end |
#transform_index_view(xslt_param_array = []) ⇒ Object
141 142 143 144 |
# File 'lib/lbp/file.rb', line 141 def transform_index_view(xslt_param_array=[]) xsltfile=@xslt_dir + @stylesheets[:index_view] # "text_display_index.xsl" doc = self.transform_apply(xsltfile, xslt_param_array) end |
#transform_json(xslt_param_array = []) ⇒ Object
161 162 163 164 |
# File 'lib/lbp/file.rb', line 161 def transform_json(xslt_param_array=[]) xsltfile=@xslt_dir + @stylesheets[:json] # "plaintext.xsl" doc = self.transform_apply(xsltfile, xslt_param_array) end |
#transform_main_view(xslt_param_array = []) ⇒ Object
137 138 139 140 |
# File 'lib/lbp/file.rb', line 137 def transform_main_view(xslt_param_array=[]) xsltfile=@xslt_dir + @stylesheets[:main_view] # "text_display.xsl" doc = self.transform_apply(xsltfile, xslt_param_array) end |
#transform_plain_text(xslt_param_array = []) ⇒ Object
153 154 155 156 |
# File 'lib/lbp/file.rb', line 153 def transform_plain_text(xslt_param_array=[]) xsltfile=@xslt_dir + @stylesheets[:plain_text] # "plaintext.xsl" doc = self.transform_apply(xsltfile, xslt_param_array) end |
#transform_plain_text_nokogiri(xslt_param_array = []) ⇒ Object
157 158 159 160 |
# File 'lib/lbp/file.rb', line 157 def transform_plain_text_nokogiri(xslt_param_array=[]) xsltfile=@xslt_dir + @stylesheets[:plain_text] # "plaintext.xsl" doc = self.transform(xsltfile, xslt_param_array) end |
#transform_toc(xslt_param_array = []) ⇒ Object
165 166 167 168 |
# File 'lib/lbp/file.rb', line 165 def transform_toc(xslt_param_array=[]) xsltfile=@xslt_dir + @stylesheets[:toc] # "lectio_outline.xsl" doc = self.transform_apply(xsltfile, xslt_param_array) end |
#validating_schema_version ⇒ Object
get validating schema label
59 60 61 62 63 64 65 66 67 |
# File 'lib/lbp/file.rb', line 59 def validating_schema_version xmldoc = self.nokogiri result = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:schemaRef[1]/@n", 'tei' => 'http://www.tei-c.org/ns/1.0') if result.length > 0 return result.to_s.split("-").last else return "default" end end |
#word_array ⇒ Object
175 176 177 178 179 |
# File 'lib/lbp/file.rb', line 175 def word_array plaintext = self.transform_plain_text word_array = plaintext.split word_array.map!{ |word| word.downcase} end |
#word_count ⇒ Object
End of Transformation Methods ### Begin Statistics Methods ###
171 172 173 174 |
# File 'lib/lbp/file.rb', line 171 def word_count plaintext = self.transform_plain_text size = plaintext.split.size end |
#word_frequency(sort, order) ⇒ Object
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
# File 'lib/lbp/file.rb', line 180 def word_frequency(sort, order) word_array = self.word_array wf = Hash.new(0) word_array.each { |word| wf[word] += 1 } if sort == "frequency" if order == "descending" # high to low wf = wf.sort_by{|k,v| v}.reverse elsif order == "ascending" # low to high wf = wf.sort_by{|k,v| v} end elsif sort == "word" if order == "descending" # z - a wf = wf.sort_by{|k,v| k}.reverse elsif order == "ascending" #a - z wf = wf.sort_by{|k,v| k} end end return wf.to_h end |