Commit 092c5d08 authored by Tom's avatar Tom
Browse files

changed default range for line cleanup

parent 6e787002
This diff is collapsed.
......@@ -843,7 +843,7 @@ def latin_kernel(line,scale=1.0,r=1.2,debug=0):
mask[y0:bottoms[i],i] = 1
return mask
def latin_filter(line,scale=1.0,r=1.2,debug=0):
def latin_filter(line,scale=1.0,r=1.5,debug=0):
bin = (line>0.5*amax(line))
mask = latin_kernel(bin,scale=scale,r=r,debug=debug)
mask = psegutils.keep_marked(bin,mask)
......
......@@ -270,7 +270,10 @@ class CmodelLineRecognizer:
self.min_xheight = 10
self.max_xheight = 40
self.check_white_on_black = 1
self.noise_threshold = 8
# components smaller than this are removed
self.noise_threshold = 3
# components up to xheight*r are captured
self.latin_r = 1.5
#self.segmenter = ocrolseg.DpSegmenter()
#self.segmenter0 = ocrolseg.SegmentLineByGCCS()
self.segmenter = lineseg.DPSegmentLine()
......@@ -314,7 +317,7 @@ class CmodelLineRecognizer:
# clean up connected components around the edges
if self.latin_cleaner:
image = 1-image
image = common.latin_filter(image)
image = common.latin_filter(image,r=self.latin_r)
image = common.remove_noise(image,self.noise_threshold)
image = 1-image
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment