Skip to content

Commit 9ffd0e4

Browse files
committed
Fixes #588, fixes #564, changes #568, relates #550 - Only filter out known problematic characters
Can not be too aggressive as some fonts contain private area code points, etc and expect them to be output.
1 parent ed2bd9c commit 9ffd0e4

File tree

4 files changed

+39
-14
lines changed

4 files changed

+39
-14
lines changed

openhtmltopdf-core/src/main/java/com/openhtmltopdf/extend/TextRenderer.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,15 @@ public interface TextRenderer {
3535
*
3636
* @param input The string can be null
3737
* @return The cleaned string or <code>null</code> if the input is null
38-
* @see com.openhtmltopdf.util.OpenUtil#isCodePointPrintable(int)
38+
* @see com.openhtmltopdf.util.OpenUtil#isSafeFontCodePointToPrint(int)
3939
*/
40-
static String getEffectivePrintableString(String input) {
40+
public static String getEffectivePrintableString(String input) {
4141
if (input == null || input.isEmpty() || areAllCharactersPrintable(input)) {
4242
return input;
4343
}
4444

4545
StringBuilder effective = new StringBuilder(input.length());
46-
input.codePoints().filter(OpenUtil::isCodePointPrintable).forEach(effective::appendCodePoint);
46+
input.codePoints().filter(OpenUtil::isSafeFontCodePointToPrint).forEach(effective::appendCodePoint);
4747

4848
return effective.toString();
4949
}

openhtmltopdf-core/src/main/java/com/openhtmltopdf/util/OpenUtil.java

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ private OpenUtil() {}
1010
* Checks if a code point is printable. If false, it can be safely discarded at the
1111
* rendering stage, else it should be replaced with the replacement character,
1212
* if a suitable glyph can not be found.
13+
*
14+
* NOTE: This should only be called after a character has been shown to be
15+
* NOT present in the font. It can not be called beforehand because some fonts
16+
* contain private area characters and so on. Issue#588.
17+
*
1318
* @param codePoint
1419
* @return whether codePoint is printable
1520
*/
@@ -26,14 +31,31 @@ public static boolean isCodePointPrintable(int codePoint) {
2631
category == Character.SURROGATE);
2732
}
2833

34+
/**
35+
* Whether the code point should be passed through to the font
36+
* for rendering. It effectively filters out characters that
37+
* have been shown to be problematic in some (broken) fonts such
38+
* as visible soft-hyphens.
39+
*/
40+
public static boolean isSafeFontCodePointToPrint(int codePoint) {
41+
switch (codePoint) {
42+
case 0xAD: // Soft hyphen, PR#550, FALLTHRU
43+
case 0xFFFC: // Object replacement character, Issue#564.
44+
return false;
45+
46+
default:
47+
return true;
48+
}
49+
}
50+
2951
/**
3052
* Returns <code>true</code>, when all characters of the given string are printable.
3153
* @param str a non-null string to test
3254
* @return whether all characters are printable
3355
*/
3456
public static boolean areAllCharactersPrintable(String str) {
3557
Objects.requireNonNull(str, "str");
36-
return str.codePoints().allMatch(OpenUtil::isCodePointPrintable);
58+
return str.codePoints().allMatch(OpenUtil::isSafeFontCodePointToPrint);
3759
}
3860

3961
public static Integer parseIntegerOrNull(String possibleInteger) {

openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxFastOutputDevice.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
import com.openhtmltopdf.simple.extend.ReplacedElementScaleHelper;
4444
import com.openhtmltopdf.util.ArrayUtil;
4545
import com.openhtmltopdf.util.LogMessageId;
46-
import com.openhtmltopdf.util.OpenUtil;
4746
import com.openhtmltopdf.util.XRLog;
4847
import de.rototor.pdfbox.graphics2d.PdfBoxGraphics2D;
4948
import de.rototor.pdfbox.graphics2d.PdfBoxGraphics2DFontTextDrawer;
@@ -74,10 +73,6 @@
7473
import java.util.Map.Entry;
7574
import java.util.logging.Level;
7675
import java.util.regex.Pattern;
77-
import java.util.stream.IntStream;
78-
import java.util.stream.StreamSupport;
79-
80-
import static com.openhtmltopdf.util.OpenUtil.areAllCharactersPrintable;
8176

8277
public class PdfBoxFastOutputDevice extends AbstractOutputDevice implements OutputDevice, PdfBoxOutputDevice {
8378
//

openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxTextRenderer.java

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import java.util.ArrayList;
2424
import java.util.List;
2525
import java.util.logging.Level;
26-
import java.util.stream.IntStream;
2726

2827
import com.openhtmltopdf.bidi.BidiReorderer;
2928
import com.openhtmltopdf.extend.FontContext;
@@ -35,11 +34,10 @@
3534
import com.openhtmltopdf.render.FSFontMetrics;
3635
import com.openhtmltopdf.render.JustificationInfo;
3736
import com.openhtmltopdf.util.LogMessageId;
37+
import com.openhtmltopdf.util.OpenUtil;
3838
import com.openhtmltopdf.util.ThreadCtx;
3939
import com.openhtmltopdf.util.XRLog;
4040

41-
import static com.openhtmltopdf.util.OpenUtil.isCodePointPrintable;
42-
4341
public class PdfBoxTextRenderer implements TextRenderer {
4442
private static float TEXT_MEASURING_DELTA = 0.01f;
4543

@@ -194,7 +192,9 @@ public static List<FontRun> divideIntoFontRuns(FSFont font, String str, BidiReor
194192
i += Character.charCount(unicode);
195193
String ch = String.valueOf(Character.toChars(unicode));
196194

197-
if (!isCodePointPrintable(unicode)) {
195+
if (!OpenUtil.isSafeFontCodePointToPrint(unicode)) {
196+
// Filter out characters that should never be visible (such
197+
// as soft-hyphen) but are in some fonts.
198198
continue;
199199
}
200200

@@ -264,8 +264,14 @@ else if (des != current.des) {
264264
}
265265
}
266266
}
267-
267+
268268
if (!gotChar) {
269+
if (!OpenUtil.isCodePointPrintable(unicode)) {
270+
// Filter out control, etc characters when they
271+
// are not present in any font.
272+
continue;
273+
}
274+
269275
// We still don't have the character after all that. So use replacement character.
270276
if (current.des == null) {
271277
// First character of run.
@@ -366,6 +372,8 @@ public int getSmoothingLevel() {
366372
return 0;
367373
}
368374

375+
@Deprecated
376+
@Override
369377
public void setSmoothingLevel(int level) {
370378
}
371379

0 commit comments

Comments
 (0)