Top Level Namespace

Defined Under Namespace

Modules: Registerbekanntmachungen

Instance Method Summary collapse

Instance Method Details

#file_name(date) ⇒ Object



40
41
42
# File 'lib/registerbekanntmachungen.rb', line 40

def file_name(date)
  "db/#{date.strftime('%Y-%m')}/registerbekanntmachungen-#{date.strftime('%Y-%m-%d')}.json"
end

#get_detailed_announcement(datum, id, view_state, cookies) ⇒ Object



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/registerbekanntmachungen/parser.rb', line 139

def get_detailed_announcement(datum, id, view_state, cookies)
  uri = URI('https://www.handelsregister.de/rp_web/xhtml/bekanntmachungen.xhtml')

  # Prepare the POST data
  post_data = {
    'javax.faces.partial.ajax' => 'true',
    'javax.faces.source' => 'bekanntMachungenForm:j_idt114',
    'javax.faces.partial.execute' => 'bekanntMachungenForm',
    'javax.faces.partial.render' => 'bekanntMachungenForm',
    'bekanntMachungenForm:j_idt114' => 'bekanntMachungenForm:j_idt114',
    'datum' => datum,
    'id' => id,
    'bekanntMachungenForm' => 'bekanntMachungenForm',
    'javax.faces.ViewState' => view_state
    # Include other necessary form data if required
  }

  headers = {
    'Cookie' => cookies,
    'Content-Type' => 'application/x-www-form-urlencoded; charset=UTF-8',
    'Faces-Request' => 'partial/ajax',
    'User-Agent' => 'Your User Agent'
  }

  # Create and send the POST request
  http = Net::HTTP.new(uri.host, uri.port)
  http.use_ssl = true
  request = Net::HTTP::Post.new(uri.request_uri, headers)
  request.set_form_data(post_data)
  response = http.request(request)

  # Parse the response
  response.body
end

#oldest_unsaved_dateObject

Add this method to determine the oldest unsaved date in the last 8 weeks



26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/registerbekanntmachungen.rb', line 26

def oldest_unsaved_date
  # Calculate the range of the last 8 weeks
  max_date = Date.today - 7 * 8
  date_range = (max_date..(Date.today - 1)).to_a

  # Find the oldest date without a JSON file
  date_range.each do |date|
    filename = "db/#{date.strftime('%Y-%m')}/registerbekanntmachungen-#{date.strftime('%Y-%m-%d')}.json"
    return date unless File.exist?(filename)
  end

  nil # Return nil if all dates are already saved
end

#parse_announcement(lines, onclick) ⇒ Object

Parse the announcement details from the given text lines and onclick attribute

<a id=“bekanntMachungenForm:datalistId:0:j_idt116:2:j_idt117” href=“#” class=“ui-commandlink ui-widget” onclick=“fireBekanntmachung2(‘Sat Nov 30 00:00:00 CET 2024’, ‘95064’);;PrimeFaces.ab(s:&quot;bekanntMachungenForm:datalistId:0:j_idt116:2:j_idt117&quot;,f:&quot;bekanntMachungenForm&quot;);return false;”>

<label id="bekanntMachungenForm:datalistId:0:j_idt116:2:j_idt118" class="ui-outputlabel ui-widget"> 
  Cancellation announcement under the Transformation Act <br> 
  Bavaria District court Regensburg HRB 16226 <br> 
  Bachner Holding GmbH 

</a>



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/registerbekanntmachungen/parser.rb', line 46

def parse_announcement(lines, onclick)
  # Initialize variables
  type = ''
  state = ''
  amtsgericht = ''
  registernummer = ''
  company_name = ''
  company_seat = ''
  former_amtsgericht = ''
  id = ''

  # Extract ID from the onclick attribute
  # fireBekanntmachung2('Sat Nov 30 00:00:00 CET 2024', '95064');
  match = onclick.match(/fireBekanntmachung\d+\('[^']*',\s*'(?<id>\d+)'\);/)
  if match
    id = match[:id]
  else
    puts "Failed to extract ID from the onclick attribute: #{onclick}" if @verbose
    # raise "Failed to extract ID from the onclick attribute: #{onclick}"
  end

  # Extract Type from the first line
  type = lines[0]
  if type == "– gelöscht –"
    lines.shift
    type = lines[0]
  end

  # Parse the second line to extract State, Amtsgericht, and Registernummer
  line2 = lines[1]

  if type.start_with?("Sonderregisterbekanntmachung")
    match = line2.match(/^(?<state>.*?)\s+Amtsgericht\s+(?<court>.*?)$/)
    if match
      state = match[:state]
      amtsgericht = "Amtsgericht #{match[:court]}"
    else
      raise "Failed to parse the second line '#{line2}': #{lines.inspect}"
    end

    sonderegister_referenz = lines[2]
    company_name = lines[3]

    return {
      id: id,
      original_text: lines.join("\n"),
      type: type,
      state: state,
      amtsgericht: amtsgericht,
      company_name: company_name,
      sonderegister_referenz: sonderegister_referenz
    }
  end

  match = line2.match(/^(?<state>.*?)\s+Amtsgericht\s+(?<court>.*?)\s+(?<registerart>(HRA|HRB|GnR|GsR|PR|VR))\s+(?<register_number>\d+(?:\s+\w+)?)(?:\s+früher Amtsgericht\s+(?<former_court>.*))?$/)
  if match
    state = match[:state]
    amtsgericht = "Amtsgericht #{match[:court]}"
    registernummer = "#{match[:registerart]} #{match[:register_number]}"
    former_amtsgericht = match[:former_court]
    registerart = match[:registerart]
  else
    state = amtsgericht = registernummer = nil
    sonderbekanntmachung_referenz = line2
  end

  # Extract Company Name and Seat from the third line
  line3 = lines[2]
  company_parts = line3.split('–').map(&:strip)
  company_name = company_parts[0]
  company_seat = company_parts[1]

  announcement = {
    id: id,
    original_text: lines.join("\n"),
    type: type,
    state: state,
    amtsgericht: amtsgericht,
    registernummer: registernummer,
    registerart: registerart,
    company_name: company_name,
    company_seat: company_seat
  }

  # Include 'former_amtsgericht' only if it's not nil or empty
  unless former_amtsgericht.nil? || former_amtsgericht.strip.empty?
    announcement[:former_amtsgericht] = former_amtsgericht
  end

  return announcement

end

#parse_announcement_response(response_body) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/registerbekanntmachungen/parser.rb', line 6

def parse_announcement_response(response_body)
  # Parse the XML response
  doc = Nokogiri::XML(response_body)

  # Find the CDATA section containing the announcement HTML
  cdata = doc.xpath('//update').text

  # Parse the HTML content
  html_doc = Nokogiri::HTML(cdata)

  # Extract the desired text
  # For example, get the text within specific labels or divs
  announcement_text = html_doc.css('#rrbPanel_content').text.strip

  if announcement_text.empty?
    # Try Sonderregisterbekanntmachung
    announcement_text = html_doc.css('#srbPanel_content').text.strip
  end

  # Remove leading/trailing whitespace
  announcement_text = announcement_text.split("\n").map(&:strip).join("\n")

  # Merge three or more consecutive newlines into two
  announcement_text = announcement_text.gsub(/\n{3,}/, "\n\n")

  announcement_text
end