Skip to content

Commit

Permalink
Merge pull request #258 from ngiger/master
Browse files Browse the repository at this point in the history
Revert updating paths for image of FI/PI
  • Loading branch information
zdavatz committed Feb 26, 2024
2 parents 2db9812 + 458171d commit 1fd1eb1
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 57 deletions.
11 changes: 6 additions & 5 deletions ext/fiparse/src/fiparse.rb
Original file line number Diff line number Diff line change
Expand Up @@ -177,29 +177,30 @@ def parse_fachinfo_docx(path, iksnr, lang='de')
writer.format = :documed
writer.extract(Hpricot(doc.to_html(true)), :fi)
end
def parse_fachinfo_html(src, title, styles, image_folder)
def parse_fachinfo_html(src, format = :documed, title='', styles = nil)
lang = (src =~ /\/de\// ? 'de' : 'fr')
if File.exist?(src)
src = File.read src
end
writer = FachinfoHpricot.new
writer.format = :swissmedicinfo
# swissmedicinfo
writer.format = format
writer.title = title
writer.lang = lang
writer.image_folder = image_folder
writer.extract(Hpricot(src), :fi, title, styles)
end
def parse_patinfo_html(src, title, styles, image_folder)
def parse_patinfo_html(src, format=:documed, title='', styles = nil, image_folder = nil)
lang = (src =~ /\/de\// ? 'de' : 'fr')
if File.exist?(src)
src = File.read src
end
writer = PatinfoHpricot.new
writer.format = :swissmedicinfo
writer.format = format
writer.title = title
writer.lang = lang
writer.image_folder = image_folder
writer.extract(Hpricot(src), :pi, title, styles)
# swissmedicinfo
end
module_function :storage=
module_function :parse_fachinfo_docx
Expand Down
9 changes: 5 additions & 4 deletions ext/fiparse/src/textinfo_hpricot.rb
Original file line number Diff line number Diff line change
Expand Up @@ -295,18 +295,19 @@ def handle_image(ptr, child)
if src =~ /^data:image\/(jp[e]?g|gif|png|x-[ew]mf);base64($|,)/
ptr.target.style = child[:style]
ext = $1
file_name = File.join(@image_folder || @title, "#{@image_index.to_s}.#{ext}")
folder = @image_folder || ((@title || @name).to_s[0,100])
name_base = File.basename(folder.to_s.gsub(/®/, '').gsub(/[^A-z0-9]/, '_')).strip
file_name = File.join(name_base + '_files', "#{@image_index.to_s}.#{ext}")
lang = (@lang || 'de')
end
dir = File.join('/', 'resources', 'images')
else
file_name = File.basename(child[:src].
gsub('
','').
gsub(/\?px=[0-9]*$/, '').strip)
lang = (file_name[0].upcase == 'F' ? 'fr' : 'de') unless file_name.empty?
type = (self.is_a?(ODDB::FiParse::FachinfoHpricot) ? 'fi' : 'pi')
dir = File.join('/', 'resources', 'images', type, lang)
end
type = (self.is_a?(ODDB::FiParse::FachinfoHpricot) ? 'fi' : 'pi')
dir = File.join('/', 'resources', 'images', type, lang)
ptr.target.src = File.join(dir, file_name)
end
def insert_image(ptr, child)
Expand Down
22 changes: 4 additions & 18 deletions ext/fiparse/test/test_fachinfo_hpricot.rb
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ def test_more_line_breaks_in_table
</table>
)
writer = FachinfoHpricot.new
writer.image_folder = "fiImageFolder_#{__LINE__}"
code, chapter = writer.chapter(Hpricot(html).at("table"))
@lookandfeel = FlexMock.new 'lookandfeel'
@lookandfeel.should_receive(:section_style).and_return { 'section_style' }
Expand Down Expand Up @@ -720,18 +719,11 @@ def setup
return if defined?(@@path) and defined?(@@fachinfo) and @@fachinfo
@@path = File.expand_path('data/html/de/fi_62580_novartis_seebris.de.html', File.dirname(__FILE__))
@@writer = FachinfoHpricot.new
@@writer.image_folder = "Seebri_Breezhaler"
open(@@path) { |fh|
@@fachinfo = @@writer.extract(Hpricot(fh), :fi, MedicInfoName)
}
end

def test_parse_fachinfo_html_with_image_dir
res = FiParse::parse_fachinfo_html(@@path, titles='dummy', styles = nil, 'fiImageFolder')
# File.open('tst_fi.yaml', 'w+' ) { |out| YAML.dump(res, out, line_width: -1 )}
assert(res.to_yaml.index('/resources/images/fiImageFolder/3.png'), 'Must have image nr 3 in fiImageFolder')
end

def test_name2
assert_equal(MedicInfoName, @@fachinfo.name.to_s)
end
Expand Down Expand Up @@ -761,12 +753,12 @@ def test_firmenlogo
assert(@@fachinfo.galenic_form.to_s.index('Firmenlogo'))
assert(@@fachinfo.effects.to_s.index('(image)'), 'Wirkungen muss Bild enthalten')
assert(@@fachinfo.galenic_form.to_s.index('(image)'), 'galenic_form must have an image')
assert(@@fachinfo.to_yaml.index('/resources/images/Seebri_Breezhaler/5.png'), 'Must have image nr 5')
assert(@@fachinfo.to_yaml.index('/resources/images/Seebri_Breezhaler/4.png'), 'Must have image nr 4')
assert(@@fachinfo.to_yaml.index('/resources/images/Seebri_Breezhaler/3.png'), 'Must have image nr 3')
assert(@@fachinfo.to_yaml.index('/resources/images/fi/de/_Seebri_Breezhaler_files/5.png'), 'Must have image nr 5')
assert(@@fachinfo.to_yaml.index('/resources/images/fi/de/_Seebri_Breezhaler_files/4.png'), 'Must have image nr 4')
assert(@@fachinfo.to_yaml.index('/resources/images/fi/de/_Seebri_Breezhaler_files/3.png'), 'Must have image nr 3')

assert(@@fachinfo.galenic_form.to_s.index('(image)'), 'Zusamensetzung muss Bild enthalten')
assert(@@fachinfo.to_yaml.index('/resources/images/Seebri_Breezhaler/1.x-wmf'), 'Must have image nr 1')
assert(@@fachinfo.to_yaml.index('/resources/images/fi/de/_Seebri_Breezhaler_files/1.x-wmf'), 'Must have image nr 1')
end

def test_iksnrs
Expand Down Expand Up @@ -816,7 +808,6 @@ def setup
return if defined?(@@path) and defined?(@@fachinfo) and @@fachinfo
@@path = File.expand_path('data/html/de/fi_62184_cipralex_de.html', File.dirname(__FILE__))
@@writer = FachinfoHpricot.new
@@writer.image_folder = "fiImageFolder_#{__LINE__}"

open(@@path) { |fh|
@@fachinfo = @@writer.extract(Hpricot(fh), :fi, MedicInfoName, Styles_Cipralex)
Expand Down Expand Up @@ -901,7 +892,6 @@ def setup
return if defined?(@@path) and defined?(@@fachinfo) and @@fachinfo
@@path = File.expand_path(HtmlName, File.dirname(__FILE__))
@@writer = FachinfoHpricot.new
@@writer.image_folder = "fiImageFolder_#{__LINE__}"

open(@@path) { |fh|
@@fachinfo = @@writer.extract(Hpricot(fh), :fi, MedicInfoName, Styles_Isentres)
Expand Down Expand Up @@ -1000,7 +990,6 @@ def setup
return if defined?(@@path) and defined?(@@fachinfo) and @@fachinfo
@@path = File.expand_path(HtmlName, File.dirname(__FILE__))
@@writer = FachinfoHpricot.new
@@writer.image_folder = "fiImageFolder_#{__LINE__}"

open(@@path) { |fh|
@@fachinfo = @@writer.extract(Hpricot(fh), :fi, MedicInfoName, Styles_Clexane)
Expand Down Expand Up @@ -1086,7 +1075,6 @@ def setup
return if defined?(@@path) and defined?(@@fachinfo) and @@fachinfo
@@path = File.expand_path(HtmlName, File.dirname(__FILE__))
@@writer = FachinfoHpricot.new
@@writer.image_folder = "fiImageFolder_#{__LINE__}"

open(@@path) { |fh|
@@fachinfo = @@writer.extract(Hpricot(fh), :fi, MedicInfoName, StylesPonstan)
Expand Down Expand Up @@ -1126,7 +1114,6 @@ def setup
return if defined?(@@path) and defined?(@@fachinfo) and @@fachinfo
@@path = File.expand_path(HtmlName, File.dirname(__FILE__))
@@writer = FachinfoHpricot.new
@@writer.image_folder = "fiImageFolder_#{__LINE__}"

open(@@path) { |fh|
@@fachinfo = @@writer.extract(Hpricot(fh), :fi, MedicInfoName, Styles_Baraclude)
Expand Down Expand Up @@ -1185,7 +1172,6 @@ def setup
return if defined?(@@path) and defined?(@@fachinfo) and @@fachinfo
@@path = File.expand_path(HtmlName, File.dirname(__FILE__))
@@writer = ODDB::FiParse::FachinfoHpricot.new
@@writer.image_folder = "fiImageFolder_#{__LINE__}"
open(@@path) { |fh|
@@fachinfo = @@writer.extract(Hpricot(fh), :fi, MedicInfoName, StylesCoAprovel)
}
Expand Down
58 changes: 34 additions & 24 deletions src/plugin/text_info.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def initialize app, opts={:newest => true}
@news_log = File.join ODDB.config.log_dir, 'textinfos.txt'
@problematic_fi_pi = File.join ODDB.config.log_dir, 'problematic_fi_pi.lst'
@title = '' # target fi/pi name
@format = :swissmedicinfo
@target = :both
@search_term = []
# FI/PI names
Expand Down Expand Up @@ -824,11 +825,7 @@ def TextInfoPlugin.find_iksnr_in_string(string, iksnr)
end

def download_swissmedicinfo_xml(file = nil)
if file
content = IO.read(file)
LogFile.debug("Read #{content.size} bytes from #{file}")
return content
end
return IO.read(file) if file
setup_default_agent
url = "http://download.swissmedicinfo.ch/Accept.aspx?ReturnUrl=%2f"
dir = File.join(ODDB.config.data_dir, 'xml')
Expand Down Expand Up @@ -910,22 +907,26 @@ def match(node_set, iksnr)
@notfound << " IKSNR-not found #{iksnr.inspect} : #{type} - #{lang.to_s}."
return name
end
def extract_images(html_file, type, lang, iksnrs, image_folder)
def extract_image(html_file, name, type, lang, iksnrs)
LogFile.debug "Extracting image to #{name}"
if html_file && File.exist?(html_file)
resource_dir = (File.join(ODDB::IMAGE_DIR, type.to_s, lang.to_s))
FileUtils.mkdir_p(resource_dir)
html = File.open(html_file, 'r:utf-8').read
if html =~ /<img\s/
images = Nokogiri::HTML(html).search('//img')
html = nil
images.each_with_index do |img, index|
name_base = File.basename(name.gsub(/®/, '').gsub(/[^A-z0-9]/, '_')).strip
dir = File.join(resource_dir, name_base + '_files')
FileUtils.mkdir_p(dir)
images.each_with_index do |img, i|
type, src = img.attributes['src'].to_s.split(',')
# next regexp must be in sync with ext/fiparse/src/textinfo_hpricot.rb
unless type =~ /^data:image\/(jp[e]?g|gif|png);base64$/
@nonconforming_content << "#{iksnrs}: '#{@title}' with non conforming #{type} element x"
end
if type =~ /^data:image\/(jp[e]?g|gif|png|x-[ew]mf);base64$/
FileUtils.mkdir_p(image_folder)
file = File.join(image_folder, "#{index + 1}.#{$1}")
LogFile.debug "Extracting #{iksnrs} image to #{file}"
file = File.join(dir, "#{i + 1}.#{$1}")
File.open(file, 'wb'){ |f| f.write(Base64.decode64(src)); f.close }
end
end
Expand Down Expand Up @@ -1237,7 +1238,7 @@ def parse_textinfo(meta_info)
reg = @app.registration(meta_info.iksnr)
if @options[:reparse]
if meta_info.authNrs && found_matching_iksnr(meta_info.authNrs)
LogFile.debug "at #{nr_uptodate}: #{type} because reparse is demanded: #{@options[:reparse]} #{meta_info.authNrs}"
LogFile.debug "parse_textinfo #{__LINE__} at #{nr_uptodate}: #{type} because reparse is demanded: #{@options[:reparse]} #{meta_info.authNrs}"
else
return
end
Expand All @@ -1246,7 +1247,7 @@ def parse_textinfo(meta_info)
html_name = res[0]
is_same_html = res[2]
unless html_name
LogFile.debug "parse_textinfo #{type}: no html_name for #{meta_info}"
LogFile.debug "parse_textinfo #{type} #{__LINE__}: no html_name for #{meta_info}"
return
end
textinfo_fi = nil
Expand All @@ -1265,40 +1266,38 @@ def parse_textinfo(meta_info)
text_info = get_textinfo(meta_info, meta_info.iksnr)

if !is_same_html
LogFile.debug "#{html_name} does is not the same: #{meta_info.authNrs}"
LogFile.debug "parse_textinfo #{__LINE__} #{html_name} does is not the same: #{meta_info.authNrs}"
elsif @options[:reparse]
LogFile.debug "reparse demanded via @options #{@options}"
LogFile.debug "parse_textinfo #{__LINE__} reparse demanded via @options #{@options}"
elsif found_matching_iksnr(meta_info.authNrs)
if meta_info.same_content_as_xml_file
type == :fi ? @up_to_date_fis += 1 : @up_to_date_pis += 1
return
end
elsif meta_info.same_content_as_xml_file
LogFile.debug "at #{nr_uptodate}: #{type} same_content_as_xml_file #{meta_info.authNrs}" if false # default casse
LogFile.debug "parse_textinfo #{__LINE__} at #{nr_uptodate}: #{type} same_content_as_xml_file #{meta_info.authNrs}" if false # default casse
type == :fi ? @up_to_date_fis += 1 : @up_to_date_pis += 1
return
end
styles = res[1]
textinfo_pi_name = nil
# image_base, image_subfolder must be in sync with ext/fiparse/src/fiparse.rb and ext/fiparse/src/textinfo_hpricot.rb
image_base = File.expand_path('./doc/resources/images')
image_subfolder = File.join(type.to_s, meta_info.lang.to_s, "#{meta_info.iksnr}_#{meta_info.title[0,10].gsub(/[^A-z0-9]/, '_')}")
image_folder = "#{meta_info.iksnr}#{meta_info.title}"[0,100]
if type == :fi
if is_same_html && !@options[:reparse] && reg && reg.fachinfo && text_info.descriptions.keys.index(meta_info.lang)
LogFile.debug "#{meta_info.iksnr} at #{nr_uptodate}: #{type} #{html_name} is_same_html #{html_name}"
LogFile.debug "parse_textinfo #{__LINE__} #{meta_info.iksnr} at #{nr_uptodate}: #{type} #{html_name} is_same_html #{html_name}"
@up_to_date_fis += 1
return
end
textinfo_fi ||= @parser.parse_fachinfo_html(html_name, meta_info.title, styles, image_subfolder)
textinfo_fi ||= @parser.parse_fachinfo_html(html_name, @format, meta_info.title, styles)
update_fachinfo_lang(meta_info, { meta_info.lang => textinfo_fi } )
elsif type == :pi
# TODO: Do we really catch all the cases when packages have different PIs?
if is_same_html && !@options[:reparse] && reg && text_info && text_info.descriptions.keys.index(meta_info.lang)
LogFile.debug "at #{nr_uptodate}: #{type} #{html_name} is_same_html #{html_name}"
LogFile.debug "parse_textinfo #{__LINE__} at #{nr_uptodate}: #{type} #{html_name} is_same_html #{html_name}"
@up_to_date_pis += 1
return
end
textinfo_pi = @parser.parse_patinfo_html(html_name, meta_info.title, styles, image_subfolder)
textinfo_pi = @parser.parse_patinfo_html(html_name, @format, meta_info.title, styles, image_folder)
update_patinfo_lang(meta_info, { meta_info.lang => textinfo_pi } )
if textinfo_pi.respond_to?(:name)
textinfo_pi_name = textinfo_pi.name
Expand All @@ -1307,8 +1306,19 @@ def parse_textinfo(meta_info)
end
# Extract image to path generated from XML title,
# This should be the "correct" path
extract_images(html_name, meta_info.type, meta_info.lang, meta_info.authNrs, File.join(image_base, image_subfolder))
LogFile.debug "at #{nr_uptodate}: #{type} textinfo #{textinfo.to_s.split("\n")[0..2]}" if self.respond_to?(:textinfo)
extract_image(html_name, image_folder, meta_info.type, meta_info.lang, meta_info.authNrs)
# However, ODBA is always buggy, sometimes it just doesn't like saving objects #231
# There's case which the Html pointed the image to a wrong path, and we cannot update
# the HTML because ODBA's problem, so here we extract image to path generated from the wrong H1 title,
if !textinfo_pi_name.nil?
begin
extract_image(html_name, textinfo_pi_name.to_s[0,100], meta_info.type, meta_info.lang, meta_info.authNrs)
rescue => error
LogFile.debug "#236 #{error}"
# Sometimes it gets file name too long error #236
end
end
LogFile.debug "parse_textinfo #{__LINE__} at #{nr_uptodate}: #{type} textinfo #{textinfo.to_s.split("\n")[0..2]}" if self.respond_to?(:textinfo)
if reg
reg.odba_store
textinfo = nil
Expand Down
2 changes: 1 addition & 1 deletion src/state/admin/patinfo_pdf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def get_patinfo_input(input)
newstate
end
def parse_patinfo(src)
HTML_PARSER.parse_patinfo_html(src, :documed, '', nil, 'admin')
HTML_PARSER.parse_patinfo_html(src)
rescue StandardError => e
msg = ' (' << e.message << ')'
err = create_error(:e_html_not_parsed, :html_upload, msg)
Expand Down
1 change: 0 additions & 1 deletion test/test_plugin/text_info.rb
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ def setup
path_check = File.expand_path(File.join(File.dirname(__FILE__), '../../etc', 'barcode_minitest.yml'))
assert_equal(ODDB::TextInfoPlugin::Override_file, path_check)
FileUtils.rm_f(path_check, :verbose => true)
FileUtils.rm_f(File.expand_path('../data/'), :verbose => true)
pointer = flexmock 'pointer'
@aips_download = File.expand_path('../data/xml/Aips_test.xml', File.dirname(__FILE__))
latest_from = File.expand_path('../data/xlsx/Packungen-latest.xlsx', File.dirname(__FILE__))
Expand Down
10 changes: 6 additions & 4 deletions test/test_plugin/text_info_swissmedicinfo.rb
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,8 @@ def test_import_patinfo_tramal_43788
@opts[:target] = :pi
@plugin = TextInfoPlugin.new(@app, @opts)
agent = @plugin.init_agent
patinfo = Patinfo.new
# @app.create_registration('43788')
patinfo = setup_texinfo_mock(:patinfo)
@parser.should_receive(:parse_fachinfo_html).never
@parser.should_receive(:parse_patinfo_html).and_return(patinfo).at_least.once
@parser.should_receive(:parse_textinfo).never
Expand Down Expand Up @@ -488,15 +489,17 @@ def test_import_patinfo_tramal_43788
assert(@plugin.import_swissmedicinfo(@opts), 'must be able to run import_swissmedicinfo')
end
assert(File.exist?(@plugin.problematic_fi_pi), "Datei #{ @plugin.problematic_fi_pi} must exist")
path = File.join(File.dirname(__FILE__), '../../doc/resources/images/pi/de/43788_Tramal__Tr/1.png')
path = File.join(File.dirname(__FILE__), '../../doc/resources/images/pi/de/43788Tramal_Tropfen__L_sung_zum_Einnehmen_files/1.png')
assert(File.exist?(path), "Created image file #{path} must exist")
@app.registration('15219').packages.size
@app.registration('15219').packages.values.find_all { |x| x.patinfo}
@app.registration('15219').sequences.values.find_all { |x| x.patinfo}
end

def test_import_fachinfo_tramal_43788
fachinfo = Fachinfo.new
fachinfo = setup_texinfo_mock(:fachinfo)
@parser.should_receive(:parse_patinfo_html).never
@parser.should_receive(:parse_fachinfo_html).at_least.once.and_return { fachinfo }
info = { :iksnr => '43788', :title => 'Tramal, Tropfen' }
info = flexmock('info 43788')
info.should_receive(:iksnr).and_return('43788')
Expand All @@ -507,7 +510,6 @@ def test_import_fachinfo_tramal_43788
@app.registration('43788').company = Aut_43788

setup_refdata_mock
@parser.should_receive(:parse_fachinfo_html)
replace_constant('ODDB::RefdataPlugin::REFDATA_SERVER', @server) do
@opts[:target] = :fi
assert(@plugin.import_swissmedicinfo(@opts), 'must be able to run import_swissmedicinfo')
Expand Down

0 comments on commit 1fd1eb1

Please sign in to comment.