Class: Lbp::File

Inherits:
Object
  • Object
show all
Defined in:
lib/lbp/file.rb

Overview

class should be renamed to Transcription

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(filepath, transcription_type, confighash) ⇒ File

Returns a new instance of File.



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/lbp/file.rb', line 12

def initialize(filepath, transcription_type, confighash)
@file_path = filepath
@confighash = confighash

unless confighash == nil
	@stylesheets = @confighash[:stylesheets]
	# identify propery xslt directory
end

# get trancription type from xmlfile
 @transcription_type = transcription_type # critical or documentary # there is also a method for this if one needs to get the type from the file itself

 # get xslt_version from xmlfile
 @xslt_version = self.validating_schema_version

unless confighash == nil
	@xslt_dir = "#{@confighash[:xslt_base]}#{@xslt_version}/#{@transcription_type}/"
end

end

Instance Attribute Details

#file_pathObject (readonly)

Returns the value of attribute file_path.



10
11
12
# File 'lib/lbp/file.rb', line 10

def file_path
  @file_path
end

#xslt_dirObject (readonly)

Returns the value of attribute xslt_dir.



10
11
12
# File 'lib/lbp/file.rb', line 10

def xslt_dir
  @xslt_dir
end

Instance Method Details

#authorObject



78
79
80
81
82
# File 'lib/lbp/file.rb', line 78

def author
	xmldoc = self.nokogiri
	author = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc/tei:titleStmt[1]/tei:author", 'tei' => 'http://www.tei-c.org/ns/1.0')
	return author.text
end

#ed_dateObject



93
94
95
96
97
# File 'lib/lbp/file.rb', line 93

def ed_date
	xmldoc = self.nokogiri
	ed_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:editionStmt[1]/tei:edition[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
	return ed_date.value
end

#ed_noObject



88
89
90
91
92
# File 'lib/lbp/file.rb', line 88

def ed_no
	xmldoc = self.nokogiri
	ed_no = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:editionStmt[1]/tei:edition[1]/@n", 'tei' => 'http://www.tei-c.org/ns/1.0')
	return ed_no.value
end

#editorObject



83
84
85
86
87
# File 'lib/lbp/file.rb', line 83

def editor
	xmldoc = self.nokogiri
	editor = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc/tei:titleStmt[1]/tei:editor", 'tei' => 'http://www.tei-c.org/ns/1.0')
	return editor.text
end

#encoding_locationObject



112
113
114
115
116
# File 'lib/lbp/file.rb', line 112

def encoding_location
	xmldoc = self.nokogiri
	encoding_location = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:variantEncoding[1]/@location", 'tei' => 'http://www.tei-c.org/ns/1.0')
	return encoding_location.value
end

#encoding_methodObject



107
108
109
110
111
# File 'lib/lbp/file.rb', line 107

def encoding_method
	xmldoc = self.nokogiri
	encoding_method = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:variantEncoding[1]/@method", 'tei' => 'http://www.tei-c.org/ns/1.0')
	return encoding_method.value
end

#fileObject



33
34
35
36
37
38
39
# File 'lib/lbp/file.rb', line 33

def file
	file = open(self.file_path)
	if file.base_uri.to_s != self.file_path
		file = open(self.file_path, {:http_basic_authentication => [@confighash[:git_username], @confighash[:git_password] ]})
	end
	return file
end

#nokogiriObject



40
41
42
# File 'lib/lbp/file.rb', line 40

def nokogiri
	xmldoc = Nokogiri::XML(self.file)
end

#number_of_columnsObject



117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/lbp/file.rb', line 117

def number_of_columns
			xmldoc = self.nokogiri
			test = xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0')
			if @transcription_type == "critical"
number_of_columns = nil
			elsif xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0
        number_of_columns = 1
  elsif xmldoc.xpath("//tei:cb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0
       	number_of_columns = 2
  end
  return number_of_columns
end

#pub_dateObject



98
99
100
101
102
103
104
105
106
# File 'lib/lbp/file.rb', line 98

def pub_date
	if self.validating_schema_version == "1.0.0"
		return "no pub date in this schema"
	else
		xmldoc = self.nokogiri
		pub_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:publicationStmt[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
		return pub_date.value
	end
end

#titleObject

Item Header Extraction and Metadata Methods



73
74
75
76
77
# File 'lib/lbp/file.rb', line 73

def title
	xmldoc = self.nokogiri
	title = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:titleStmt[1]/tei:title[1]", 'tei' => 'http://www.tei-c.org/ns/1.0')
	return title.text
end

#transcription_typeObject



69
70
71
# File 'lib/lbp/file.rb', line 69

def transcription_type

end

#transcription_type_from_fileObject

Get transcription type



46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/lbp/file.rb', line 46

def transcription_type_from_file
	xmldoc = self.nokogiri

	result = xmldoc.xpath("/tei:TEI/tei:text[1]/@type", 'tei' => 'http://www.tei-c.org/ns/1.0')

	if result.length > 0
		return result.to_s
	else
		return "unknown"
	end

end

#transform(xsltfile, xslt_param_array = []) ⇒ Object

Begin transform (XSLT) methocs ###



131
132
133
# File 'lib/lbp/file.rb', line 131

def transform(xsltfile, xslt_param_array=[])
			doc = xslt_transform(self.nokogiri, xsltfile, xslt_param_array)
end

#transform_apply(xsltfile, xslt_param_array = []) ⇒ Object



134
135
136
# File 'lib/lbp/file.rb', line 134

def transform_apply(xsltfile, xslt_param_array=[])
			doc = xslt_apply_to(self.nokogiri, xsltfile, xslt_param_array)
end

#transform_clean(xslt_param_array = []) ⇒ Object



145
146
147
148
# File 'lib/lbp/file.rb', line 145

def transform_clean(xslt_param_array=[])
	xsltfile=@xslt_dir + @stylesheets[:clean_view] # "clean_forStatistics.xsl"
	doc = self.transform_apply(xsltfile, xslt_param_array)
end

#transform_clean_nokogiri(xslt_param_array = []) ⇒ Object



149
150
151
152
# File 'lib/lbp/file.rb', line 149

def transform_clean_nokogiri(xslt_param_array=[])
	xsltfile=@xslt_dir + @stylesheets[:clean_view] # "clean_forStatistics.xsl"
	doc = self.transform(xsltfile, xslt_param_array)
end

#transform_index_view(xslt_param_array = []) ⇒ Object



141
142
143
144
# File 'lib/lbp/file.rb', line 141

def transform_index_view(xslt_param_array=[])
	xsltfile=@xslt_dir + @stylesheets[:index_view] # "text_display_index.xsl"
	doc = self.transform_apply(xsltfile, xslt_param_array)
end

#transform_json(xslt_param_array = []) ⇒ Object



161
162
163
164
# File 'lib/lbp/file.rb', line 161

def transform_json(xslt_param_array=[])
	xsltfile=@xslt_dir + @stylesheets[:json] # "plaintext.xsl"
	doc = self.transform_apply(xsltfile, xslt_param_array)
end

#transform_main_view(xslt_param_array = []) ⇒ Object



137
138
139
140
# File 'lib/lbp/file.rb', line 137

def transform_main_view(xslt_param_array=[])
	xsltfile=@xslt_dir + @stylesheets[:main_view] # "text_display.xsl"
	doc = self.transform_apply(xsltfile, xslt_param_array)
end

#transform_plain_text(xslt_param_array = []) ⇒ Object



153
154
155
156
# File 'lib/lbp/file.rb', line 153

def transform_plain_text(xslt_param_array=[])
	xsltfile=@xslt_dir + @stylesheets[:plain_text] # "plaintext.xsl"
	doc = self.transform_apply(xsltfile, xslt_param_array)
end

#transform_plain_text_nokogiri(xslt_param_array = []) ⇒ Object



157
158
159
160
# File 'lib/lbp/file.rb', line 157

def transform_plain_text_nokogiri(xslt_param_array=[])
	xsltfile=@xslt_dir + @stylesheets[:plain_text] # "plaintext.xsl"
	doc = self.transform(xsltfile, xslt_param_array)
end

#transform_toc(xslt_param_array = []) ⇒ Object



165
166
167
168
# File 'lib/lbp/file.rb', line 165

def transform_toc(xslt_param_array=[])
	xsltfile=@xslt_dir + @stylesheets[:toc] # "lectio_outline.xsl"
	doc = self.transform_apply(xsltfile, xslt_param_array)
end

#validating_schema_versionObject

get validating schema label



59
60
61
62
63
64
65
66
67
# File 'lib/lbp/file.rb', line 59

def validating_schema_version
	xmldoc = self.nokogiri
	result = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:schemaRef[1]/@n", 'tei' => 'http://www.tei-c.org/ns/1.0')
	if result.length > 0
		return result.to_s.split("-").last
	else
		return "default"
	end
end

#word_arrayObject



175
176
177
178
179
# File 'lib/lbp/file.rb', line 175

def word_array
	plaintext = self.transform_plain_text
	word_array = plaintext.split
	word_array.map!{ |word| word.downcase}
end

#word_countObject

End of Transformation Methods ### Begin Statistics Methods ###



171
172
173
174
# File 'lib/lbp/file.rb', line 171

def word_count
	plaintext = self.transform_plain_text
	size = plaintext.split.size
end

#word_frequency(sort, order) ⇒ Object



180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# File 'lib/lbp/file.rb', line 180

def word_frequency(sort, order)
	word_array = self.word_array
	wf = Hash.new(0)
			word_array.each { |word| wf[word] += 1 }

			if sort == "frequency"
if order == "descending" # high to low
	wf = wf.sort_by{|k,v| v}.reverse
elsif order == "ascending" # low to high
	wf = wf.sort_by{|k,v| v}
end
			elsif sort == "word"
if order == "descending" # z - a
		wf = wf.sort_by{|k,v| k}.reverse
elsif order == "ascending" #a - z
		wf = wf.sort_by{|k,v| k}
end
			end
			return wf.to_h
end